(feat) basic retries when hitting rate limit errors (#105)

* w

Signed-off-by: Thibault Sottiaux <tibo@openai.com>

* w

Signed-off-by: Thibault Sottiaux <tibo@openai.com>

* w

Signed-off-by: Thibault Sottiaux <tibo@openai.com>

* w

Signed-off-by: Thibault Sottiaux <tibo@openai.com>

* w

Signed-off-by: Thibault Sottiaux <tibo@openai.com>

---------

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
This commit is contained in:
Thibault Sottiaux
2025-04-16 13:47:23 -07:00
committed by GitHub
parent 090140da09
commit 1c4e2e19ea
3 changed files with 100 additions and 79 deletions

View File

@@ -22,6 +22,12 @@ import { handleExecCommand } from "./handle-exec-command.js";
import { randomUUID } from "node:crypto"; import { randomUUID } from "node:crypto";
import OpenAI, { APIConnectionTimeoutError } from "openai"; import OpenAI, { APIConnectionTimeoutError } from "openai";
// Wait time before retrying after rate limit errors (ms).
const RATE_LIMIT_RETRY_WAIT_MS = parseInt(
process.env["OPENAI_RATE_LIMIT_RETRY_WAIT_MS"] || "15000",
10,
);
export type CommandConfirmation = { export type CommandConfirmation = {
review: ReviewDecision; review: ReviewDecision;
applyPatch?: ApplyPatchCommand | undefined; applyPatch?: ApplyPatchCommand | undefined;
@@ -479,8 +485,9 @@ export class AgentLoop {
} }
// Send request to OpenAI with retry on timeout // Send request to OpenAI with retry on timeout
let stream; let stream;
// Retry loop for transient errors. Up to MAX_RETRIES attempts. // Retry loop for transient errors. Up to MAX_RETRIES attempts.
const MAX_RETRIES = 3; const MAX_RETRIES = 5;
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) { for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
try { try {
let reasoning: Reasoning | undefined; let reasoning: Reasoning | undefined;
@@ -589,7 +596,18 @@ export class AgentLoop {
this.onLoading(false); this.onLoading(false);
return; return;
} }
if (isRateLimit) { if (isRateLimit) {
if (attempt < MAX_RETRIES) {
log(
`OpenAI rate limit exceeded (attempt ${attempt}/${MAX_RETRIES}), retrying in ${RATE_LIMIT_RETRY_WAIT_MS} ms...`,
);
// eslint-disable-next-line no-await-in-loop
await new Promise((resolve) =>
setTimeout(resolve, RATE_LIMIT_RETRY_WAIT_MS),
);
continue;
}
this.onItem({ this.onItem({
id: `error-${Date.now()}`, id: `error-${Date.now()}`,
type: "message", type: "message",
@@ -597,13 +615,14 @@ export class AgentLoop {
content: [ content: [
{ {
type: "input_text", type: "input_text",
text: "⚠️ Rate limit reached while contacting OpenAI. Please wait a moment and try again.", text: "⚠️ Rate limit reached while contacting OpenAI. Please try again later.",
}, },
], ],
}); });
this.onLoading(false); this.onLoading(false);
return; return;
} }
const isClientError = const isClientError =
(typeof status === "number" && (typeof status === "number" &&
status >= 400 && status >= 400 &&

View File

@@ -1,57 +1,39 @@
import { describe, it, expect, vi } from "vitest"; import { describe, it, expect, vi } from "vitest";
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Utility: fake OpenAI SDK with programmable behaviour per test case. // Mock helpers
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Same helper as used in agent-network-errors.test.ts duplicated here to keep // Keep reference so test cases can programmatically change behaviour of the
// the test file selfcontained. // fake OpenAI client.
// Exported so that the strict TypeScript compiler does not flag it as unused
// individual tests may import it for adhoc diagnostics when debugging.
export function _createStream(events: Array<any>) {
return new (class {
public controller = { abort: vi.fn() };
async *[Symbol.asyncIterator]() {
for (const ev of events) {
yield ev;
}
}
})();
}
// Holders so tests can access spies/state injected by the mock.
const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {}; const openAiState: { createSpy?: ReturnType<typeof vi.fn> } = {};
/**
* Mock the "openai" package so we can simulate ratelimit errors without
* making real network calls. The AgentLoop only relies on `responses.create`
* so we expose a minimal stub.
*/
vi.mock("openai", () => { vi.mock("openai", () => {
class RateLimitError extends Error {
public code = "rate_limit_exceeded";
constructor(message: string) {
super(message);
this.name = "RateLimitError";
}
}
// Reexport the timeout error as well so other tests that expect it continue
// to work regardless of execution order.
class APIConnectionTimeoutError extends Error {}
class FakeOpenAI { class FakeOpenAI {
public responses = { public responses = {
// `createSpy` will be swapped out per test. // Will be replaced pertest via `openAiState.createSpy`.
create: (...args: Array<any>) => openAiState.createSpy!(...args), create: (...args: Array<any>) => openAiState.createSpy!(...args),
}; };
} }
// The real SDK exports this constructor include it for typings even
// though it is not used in this spec.
class APIConnectionTimeoutError extends Error {}
return { return {
__esModule: true, __esModule: true,
default: FakeOpenAI, default: FakeOpenAI,
RateLimitError,
APIConnectionTimeoutError, APIConnectionTimeoutError,
}; };
}); });
// Stub approvals / formatting helpers not relevant to ratelimit handling. // Stub helpers that the agent indirectly imports so it does not attempt any
// filesystem access or real approvals logic during the test.
vi.mock("@lib/approvals.js", () => ({ vi.mock("@lib/approvals.js", () => ({
__esModule: true, __esModule: true,
alwaysApprovedCommands: new Set<string>(), alwaysApprovedCommands: new Set<string>(),
@@ -64,7 +46,7 @@ vi.mock("@lib/format-command.js", () => ({
formatCommandForDisplay: (c: Array<string>) => c.join(" "), formatCommandForDisplay: (c: Array<string>) => c.join(" "),
})); }));
// Silence debug logging from agentloop so test output remains clean. // Silence agentloop debug logging so test output stays clean.
vi.mock("../src/utils/agent/log.js", () => ({ vi.mock("../src/utils/agent/log.js", () => ({
__esModule: true, __esModule: true,
log: () => {}, log: () => {},
@@ -73,55 +55,75 @@ vi.mock("../src/utils/agent/log.js", () => ({
import { AgentLoop } from "../src/utils/agent/agent-loop.js"; import { AgentLoop } from "../src/utils/agent/agent-loop.js";
describe("AgentLoop OpenAI rate limit errors", () => { describe("AgentLoop ratelimit handling", () => {
it("surfaces a userfriendly system message instead of throwing on RateLimitError (TDD expected to fail)", async () => { it("retries up to the maximum and then surfaces a system message", async () => {
// Arrange fake OpenAI: every call fails with a ratelimit error. // Enable fake timers for this test only we restore real timers at the end
const rateLimitErrMsg = // so other tests are unaffected.
"Rate limit reached: Limit 20, Used 20, Requested 1. Please try again."; vi.useFakeTimers();
openAiState.createSpy = vi.fn(async () => { try {
// Simulate the SDK throwing before any streaming begins. // Construct a dummy ratelimit error that matches the implementation's
// In real life this happens when the HTTP response status is 429. // detection logic (`status === 429`).
const err: any = new Error(rateLimitErrMsg); const rateLimitErr: any = new Error("Rate limit exceeded");
err.code = "rate_limit_exceeded"; rateLimitErr.status = 429;
throw err;
});
const received: Array<any> = []; // Always throw the ratelimit error to force the loop to exhaust all
// retries (5 attempts in total).
openAiState.createSpy = vi.fn(async () => {
throw rateLimitErr;
});
const agent = new AgentLoop({ const received: Array<any> = [];
model: "any",
instructions: "",
approvalPolicy: { mode: "auto" } as any,
onItem: (i) => received.push(i),
onLoading: () => {},
getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
const userMsg = [ const agent = new AgentLoop({
{ model: "any",
type: "message", instructions: "",
role: "user", approvalPolicy: { mode: "auto" } as any,
content: [{ type: "input_text", text: "hello" }], onItem: (i) => received.push(i),
}, onLoading: () => {},
]; getCommandConfirmation: async () => ({ review: "yes" } as any),
onLastResponseId: () => {},
});
// The desired behaviour (not yet implemented): AgentLoop should catch the const userMsg = [
// ratelimit error, emit a helpful system message and resolve without {
// throwing so callers can let the user retry. type: "message",
await expect(agent.run(userMsg as any)).resolves.not.toThrow(); role: "user",
content: [{ type: "input_text", text: "hello" }],
},
];
// Let flush timers run. // Start the run but don't await yet so we can advance fake timers while it
await new Promise((r) => setTimeout(r, 20)); // is in progress.
const runPromise = agent.run(userMsg as any);
const sysMsg = received.find( // The agent waits 15 000 ms between retries (ratelimit backoff) and does
(i) => // this four times (after attempts 14). Fastforward a bit more to cover
i.role === "system" && // any additional small `setTimeout` calls inside the implementation.
typeof i.content?.[0]?.text === "string" && await vi.advanceTimersByTimeAsync(61_000); // 4 * 15s + 1s safety margin
i.content[0].text.includes("Rate limit"),
);
expect(sysMsg).toBeTruthy(); // Ensure the promise settles without throwing.
await expect(runPromise).resolves.not.toThrow();
// Flush the 10 ms staging delay used when emitting items.
await vi.advanceTimersByTimeAsync(20);
// The OpenAI client should have been called the maximum number of retry
// attempts (5).
expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
// Finally, verify that the user sees a helpful system message.
const sysMsg = received.find(
(i) =>
i.role === "system" &&
typeof i.content?.[0]?.text === "string" &&
i.content[0].text.includes("Rate limit reached"),
);
expect(sysMsg).toBeTruthy();
} finally {
// Ensure global timer state is restored for subsequent tests.
vi.useRealTimers();
}
}); });
}); });

View File

@@ -152,7 +152,7 @@ describe("AgentLoop automatic retry on 5xx errors", () => {
await new Promise((r) => setTimeout(r, 20)); await new Promise((r) => setTimeout(r, 20));
expect(openAiState.createSpy).toHaveBeenCalledTimes(3); expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
const sysMsg = received.find( const sysMsg = received.find(
(i) => (i) =>