feat: lower default retry wait time and increase number of tries (#720)

In total we now guarantee that we will wait for at least 60s before giving up. --------- Signed-off-by: Thibault Sottiaux <tibo@openai.com>
2025-04-28 21:11:30 -07:00
parent e79549f039
commit d09dbba7ec
3 changed files with 8 additions and 10 deletions
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -34,7 +34,7 @@ import OpenAI, { APIConnectionTimeoutError } from "openai";
 // Wait time before retrying after rate limit errors (ms).
 const RATE_LIMIT_RETRY_WAIT_MS = parseInt(
-  process.env["OPENAI_RATE_LIMIT_RETRY_WAIT_MS"] || "2500",
+  process.env["OPENAI_RATE_LIMIT_RETRY_WAIT_MS"] || "500",
  10,
 );
@@ -671,7 +671,7 @@ export class AgentLoop {
        let stream;
        // Retry loop for transient errors. Up to MAX_RETRIES attempts.
-        const MAX_RETRIES = 5;
+        const MAX_RETRIES = 8;
        for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
          try {
            let reasoning: Reasoning | undefined;
--- a/codex-cli/tests/agent-rate-limit-error.test.ts
+++ b/codex-cli/tests/agent-rate-limit-error.test.ts
@@ -98,10 +98,8 @@ describe("AgentLoop – rate‑limit handling", () => {
      // is in progress.
      const runPromise = agent.run(userMsg as any);
-      // The agent waits 15 000 ms between retries (rate‑limit back‑off) and does
+      // Should be done in at most 180 seconds.
-      // this four times (after attempts 1‑4). Fast‑forward a bit more to cover
+      await vi.advanceTimersByTimeAsync(180_000);
      // any additional small `setTimeout` calls inside the implementation.
      await vi.advanceTimersByTimeAsync(61_000); // 4 * 15s + 1s safety margin
      // Ensure the promise settles without throwing.
      await expect(runPromise).resolves.not.toThrow();
@@ -110,8 +108,8 @@ describe("AgentLoop – rate‑limit handling", () => {
      await vi.advanceTimersByTimeAsync(20);
      // The OpenAI client should have been called the maximum number of retry
-      // attempts (5).
+      // attempts (8).
-      expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
+      expect(openAiState.createSpy).toHaveBeenCalledTimes(8);
      // Finally, verify that the user sees a helpful system message.
      const sysMsg = received.find(
--- a/codex-cli/tests/agent-server-retry.test.ts
+++ b/codex-cli/tests/agent-server-retry.test.ts
@@ -122,7 +122,7 @@ describe("AgentLoop – automatic retry on 5xx errors", () => {
    expect(assistant?.content?.[0]?.text).toBe("ok");
  });
-  it("fails after 3 attempts and surfaces system message", async () => {
+  it("fails after a few attempts and surfaces system message", async () => {
    openAiState.createSpy = vi.fn(async () => {
      const err: any = new Error("Internal Server Error");
      err.status = 502; // any 5xx
@@ -154,7 +154,7 @@ describe("AgentLoop – automatic retry on 5xx errors", () => {
    await new Promise((r) => setTimeout(r, 20));
-    expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
+    expect(openAiState.createSpy).toHaveBeenCalledTimes(8);
    const sysMsg = received.find(
      (i) =>