(feat) basic retries when hitting rate limit errors (#105)
* w Signed-off-by: Thibault Sottiaux <tibo@openai.com> * w Signed-off-by: Thibault Sottiaux <tibo@openai.com> * w Signed-off-by: Thibault Sottiaux <tibo@openai.com> * w Signed-off-by: Thibault Sottiaux <tibo@openai.com> * w Signed-off-by: Thibault Sottiaux <tibo@openai.com> --------- Signed-off-by: Thibault Sottiaux <tibo@openai.com>
This commit is contained in:
committed by
GitHub
parent
090140da09
commit
1c4e2e19ea
@@ -22,6 +22,12 @@ import { handleExecCommand } from "./handle-exec-command.js";
|
|||||||
import { randomUUID } from "node:crypto";
|
import { randomUUID } from "node:crypto";
|
||||||
import OpenAI, { APIConnectionTimeoutError } from "openai";
|
import OpenAI, { APIConnectionTimeoutError } from "openai";
|
||||||
|
|
||||||
|
// Wait time before retrying after rate limit errors (ms).
|
||||||
|
const RATE_LIMIT_RETRY_WAIT_MS = parseInt(
|
||||||
|
process.env["OPENAI_RATE_LIMIT_RETRY_WAIT_MS"] || "15000",
|
||||||
|
10,
|
||||||
|
);
|
||||||
|
|
||||||
export type CommandConfirmation = {
|
export type CommandConfirmation = {
|
||||||
review: ReviewDecision;
|
review: ReviewDecision;
|
||||||
applyPatch?: ApplyPatchCommand | undefined;
|
applyPatch?: ApplyPatchCommand | undefined;
|
||||||
@@ -479,8 +485,9 @@ export class AgentLoop {
|
|||||||
}
|
}
|
||||||
// Send request to OpenAI with retry on timeout
|
// Send request to OpenAI with retry on timeout
|
||||||
let stream;
|
let stream;
|
||||||
|
|
||||||
// Retry loop for transient errors. Up to MAX_RETRIES attempts.
|
// Retry loop for transient errors. Up to MAX_RETRIES attempts.
|
||||||
const MAX_RETRIES = 3;
|
const MAX_RETRIES = 5;
|
||||||
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
||||||
try {
|
try {
|
||||||
let reasoning: Reasoning | undefined;
|
let reasoning: Reasoning | undefined;
|
||||||
@@ -589,7 +596,18 @@ export class AgentLoop {
|
|||||||
this.onLoading(false);
|
this.onLoading(false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isRateLimit) {
|
if (isRateLimit) {
|
||||||
|
if (attempt < MAX_RETRIES) {
|
||||||
|
log(
|
||||||
|
`OpenAI rate limit exceeded (attempt ${attempt}/${MAX_RETRIES}), retrying in ${RATE_LIMIT_RETRY_WAIT_MS} ms...`,
|
||||||
|
);
|
||||||
|
// eslint-disable-next-line no-await-in-loop
|
||||||
|
await new Promise((resolve) =>
|
||||||
|
setTimeout(resolve, RATE_LIMIT_RETRY_WAIT_MS),
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
this.onItem({
|
this.onItem({
|
||||||
id: `error-${Date.now()}`,
|
id: `error-${Date.now()}`,
|
||||||
type: "message",
|
type: "message",
|
||||||
@@ -597,13 +615,14 @@ export class AgentLoop {
|
|||||||
content: [
|
content: [
|
||||||
{
|
{
|
||||||
type: "input_text",
|
type: "input_text",
|
||||||
text: "⚠️ Rate limit reached while contacting OpenAI. Please wait a moment and try again.",
|
text: "⚠️ Rate limit reached while contacting OpenAI. Please try again later.",
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
this.onLoading(false);
|
this.onLoading(false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const isClientError =
|
const isClientError =
|
||||||
(typeof status === "number" &&
|
(typeof status === "number" &&
|
||||||
status >= 400 &&
|
status >= 400 &&
|
||||||
|
|||||||
@@ -1,57 +1,39 @@
|
|||||||
import { describe, it, expect, vi } from "vitest";
|
import { describe, it, expect, vi } from "vitest";
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Utility: fake OpenAI SDK with programmable behaviour per test case.
|
// Mock helpers
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
// Same helper as used in agent-network-errors.test.ts – duplicated here to keep
|
// Keep reference so test cases can programmatically change behaviour of the
|
||||||
// the test file self‑contained.
|
// fake OpenAI client.
|
||||||
// Exported so that the strict TypeScript compiler does not flag it as unused –
|
|
||||||
// individual tests may import it for ad‑hoc diagnostics when debugging.
|
|
||||||
export function _createStream(events: Array<any>) {
|
|
||||||
return new (class {
|
|
||||||
public controller = { abort: vi.fn() };
|
|
||||||
|
|
||||||
async *[Symbol.asyncIterator]() {
|
|
||||||
for (const ev of events) {
|
|
||||||
yield ev;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
})();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Holders so tests can access spies/state injected by the mock.
|
|
||||||
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
|
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mock the "openai" package so we can simulate rate‑limit errors without
|
||||||
|
* making real network calls. The AgentLoop only relies on `responses.create`
|
||||||
|
* so we expose a minimal stub.
|
||||||
|
*/
|
||||||
vi.mock("openai", () => {
|
vi.mock("openai", () => {
|
||||||
class RateLimitError extends Error {
|
|
||||||
public code = "rate_limit_exceeded";
|
|
||||||
constructor(message: string) {
|
|
||||||
super(message);
|
|
||||||
this.name = "RateLimitError";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Re‑export the timeout error as well so other tests that expect it continue
|
|
||||||
// to work regardless of execution order.
|
|
||||||
class APIConnectionTimeoutError extends Error {}
|
|
||||||
|
|
||||||
class FakeOpenAI {
|
class FakeOpenAI {
|
||||||
public responses = {
|
public responses = {
|
||||||
// `createSpy` will be swapped out per test.
|
// Will be replaced per‑test via `openAiState.createSpy`.
|
||||||
create: (...args: Array<any>) => openAiState.createSpy!(...args),
|
create: (...args: Array<any>) => openAiState.createSpy!(...args),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The real SDK exports this constructor – include it for typings even
|
||||||
|
// though it is not used in this spec.
|
||||||
|
class APIConnectionTimeoutError extends Error {}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
__esModule: true,
|
__esModule: true,
|
||||||
default: FakeOpenAI,
|
default: FakeOpenAI,
|
||||||
RateLimitError,
|
|
||||||
APIConnectionTimeoutError,
|
APIConnectionTimeoutError,
|
||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
// Stub approvals / formatting helpers – not relevant to rate‑limit handling.
|
// Stub helpers that the agent indirectly imports so it does not attempt any
|
||||||
|
// file‑system access or real approvals logic during the test.
|
||||||
vi.mock("@lib/approvals.js", () => ({
|
vi.mock("@lib/approvals.js", () => ({
|
||||||
__esModule: true,
|
__esModule: true,
|
||||||
alwaysApprovedCommands: new Set<string>(),
|
alwaysApprovedCommands: new Set<string>(),
|
||||||
@@ -64,7 +46,7 @@ vi.mock("@lib/format-command.js", () => ({
|
|||||||
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
|
formatCommandForDisplay: (c: Array<string>) => c.join(" "),
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Silence debug logging from agent‑loop so test output remains clean.
|
// Silence agent‑loop debug logging so test output stays clean.
|
||||||
vi.mock("../src/utils/agent/log.js", () => ({
|
vi.mock("../src/utils/agent/log.js", () => ({
|
||||||
__esModule: true,
|
__esModule: true,
|
||||||
log: () => {},
|
log: () => {},
|
||||||
@@ -73,55 +55,75 @@ vi.mock("../src/utils/agent/log.js", () => ({
|
|||||||
|
|
||||||
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
|
import { AgentLoop } from "../src/utils/agent/agent-loop.js";
|
||||||
|
|
||||||
describe("AgentLoop – OpenAI rate limit errors", () => {
|
describe("AgentLoop – rate‑limit handling", () => {
|
||||||
it("surfaces a user‑friendly system message instead of throwing on RateLimitError (TDD – expected to fail)", async () => {
|
it("retries up to the maximum and then surfaces a system message", async () => {
|
||||||
// Arrange fake OpenAI: every call fails with a rate‑limit error.
|
// Enable fake timers for this test only – we restore real timers at the end
|
||||||
const rateLimitErrMsg =
|
// so other tests are unaffected.
|
||||||
"Rate limit reached: Limit 20, Used 20, Requested 1. Please try again.";
|
vi.useFakeTimers();
|
||||||
|
|
||||||
openAiState.createSpy = vi.fn(async () => {
|
try {
|
||||||
// Simulate the SDK throwing before any streaming begins.
|
// Construct a dummy rate‑limit error that matches the implementation's
|
||||||
// In real life this happens when the HTTP response status is 429.
|
// detection logic (`status === 429`).
|
||||||
const err: any = new Error(rateLimitErrMsg);
|
const rateLimitErr: any = new Error("Rate limit exceeded");
|
||||||
err.code = "rate_limit_exceeded";
|
rateLimitErr.status = 429;
|
||||||
throw err;
|
|
||||||
});
|
|
||||||
|
|
||||||
const received: Array<any> = [];
|
// Always throw the rate‑limit error to force the loop to exhaust all
|
||||||
|
// retries (5 attempts in total).
|
||||||
|
openAiState.createSpy = vi.fn(async () => {
|
||||||
|
throw rateLimitErr;
|
||||||
|
});
|
||||||
|
|
||||||
const agent = new AgentLoop({
|
const received: Array<any> = [];
|
||||||
model: "any",
|
|
||||||
instructions: "",
|
|
||||||
approvalPolicy: { mode: "auto" } as any,
|
|
||||||
onItem: (i) => received.push(i),
|
|
||||||
onLoading: () => {},
|
|
||||||
getCommandConfirmation: async () => ({ review: "yes" } as any),
|
|
||||||
onLastResponseId: () => {},
|
|
||||||
});
|
|
||||||
|
|
||||||
const userMsg = [
|
const agent = new AgentLoop({
|
||||||
{
|
model: "any",
|
||||||
type: "message",
|
instructions: "",
|
||||||
role: "user",
|
approvalPolicy: { mode: "auto" } as any,
|
||||||
content: [{ type: "input_text", text: "hello" }],
|
onItem: (i) => received.push(i),
|
||||||
},
|
onLoading: () => {},
|
||||||
];
|
getCommandConfirmation: async () => ({ review: "yes" } as any),
|
||||||
|
onLastResponseId: () => {},
|
||||||
|
});
|
||||||
|
|
||||||
// The desired behaviour (not yet implemented): AgentLoop should catch the
|
const userMsg = [
|
||||||
// rate‑limit error, emit a helpful system message and resolve without
|
{
|
||||||
// throwing so callers can let the user retry.
|
type: "message",
|
||||||
await expect(agent.run(userMsg as any)).resolves.not.toThrow();
|
role: "user",
|
||||||
|
content: [{ type: "input_text", text: "hello" }],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
// Let flush timers run.
|
// Start the run but don't await yet so we can advance fake timers while it
|
||||||
await new Promise((r) => setTimeout(r, 20));
|
// is in progress.
|
||||||
|
const runPromise = agent.run(userMsg as any);
|
||||||
|
|
||||||
const sysMsg = received.find(
|
// The agent waits 15 000 ms between retries (rate‑limit back‑off) and does
|
||||||
(i) =>
|
// this four times (after attempts 1‑4). Fast‑forward a bit more to cover
|
||||||
i.role === "system" &&
|
// any additional small `setTimeout` calls inside the implementation.
|
||||||
typeof i.content?.[0]?.text === "string" &&
|
await vi.advanceTimersByTimeAsync(61_000); // 4 * 15s + 1s safety margin
|
||||||
i.content[0].text.includes("Rate limit"),
|
|
||||||
);
|
|
||||||
|
|
||||||
expect(sysMsg).toBeTruthy();
|
// Ensure the promise settles without throwing.
|
||||||
|
await expect(runPromise).resolves.not.toThrow();
|
||||||
|
|
||||||
|
// Flush the 10 ms staging delay used when emitting items.
|
||||||
|
await vi.advanceTimersByTimeAsync(20);
|
||||||
|
|
||||||
|
// The OpenAI client should have been called the maximum number of retry
|
||||||
|
// attempts (5).
|
||||||
|
expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
|
||||||
|
|
||||||
|
// Finally, verify that the user sees a helpful system message.
|
||||||
|
const sysMsg = received.find(
|
||||||
|
(i) =>
|
||||||
|
i.role === "system" &&
|
||||||
|
typeof i.content?.[0]?.text === "string" &&
|
||||||
|
i.content[0].text.includes("Rate limit reached"),
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(sysMsg).toBeTruthy();
|
||||||
|
} finally {
|
||||||
|
// Ensure global timer state is restored for subsequent tests.
|
||||||
|
vi.useRealTimers();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -152,7 +152,7 @@ describe("AgentLoop – automatic retry on 5xx errors", () => {
|
|||||||
|
|
||||||
await new Promise((r) => setTimeout(r, 20));
|
await new Promise((r) => setTimeout(r, 20));
|
||||||
|
|
||||||
expect(openAiState.createSpy).toHaveBeenCalledTimes(3);
|
expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
|
||||||
|
|
||||||
const sysMsg = received.find(
|
const sysMsg = received.find(
|
||||||
(i) =>
|
(i) =>
|
||||||
|
|||||||
Reference in New Issue
Block a user