From d09dbba7ec28218c9ca680f02ab7eb9309ca8bb9 Mon Sep 17 00:00:00 2001
From: Thibault Sottiaux <tibo@openai.com>
Date: Mon, 28 Apr 2025 21:11:30 -0700
Subject: [PATCH] feat: lower default retry wait time and increase number of
 tries (#720)

In total we now guarantee that we will wait for at least 60s before
giving up.

---------

Signed-off-by: Thibault Sottiaux <tibo@openai.com>
---
 codex-cli/src/utils/agent/agent-loop.ts        |  4 ++--
 codex-cli/tests/agent-rate-limit-error.test.ts | 10 ++++------
 codex-cli/tests/agent-server-retry.test.ts     |  4 ++--
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts
index 5fca0016..3ed8c9f1 100644
--- a/codex-cli/src/utils/agent/agent-loop.ts
+++ b/codex-cli/src/utils/agent/agent-loop.ts
@@ -34,7 +34,7 @@ import OpenAI, { APIConnectionTimeoutError } from "openai";
 
 // Wait time before retrying after rate limit errors (ms).
 const RATE_LIMIT_RETRY_WAIT_MS = parseInt(
-  process.env["OPENAI_RATE_LIMIT_RETRY_WAIT_MS"] || "2500",
+  process.env["OPENAI_RATE_LIMIT_RETRY_WAIT_MS"] || "500",
   10,
 );
 
@@ -671,7 +671,7 @@ export class AgentLoop {
         let stream;
 
         // Retry loop for transient errors. Up to MAX_RETRIES attempts.
-        const MAX_RETRIES = 5;
+        const MAX_RETRIES = 8;
         for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
           try {
             let reasoning: Reasoning | undefined;
diff --git a/codex-cli/tests/agent-rate-limit-error.test.ts b/codex-cli/tests/agent-rate-limit-error.test.ts
index 086ef64a..4579293e 100644
--- a/codex-cli/tests/agent-rate-limit-error.test.ts
+++ b/codex-cli/tests/agent-rate-limit-error.test.ts
@@ -98,10 +98,8 @@ describe("AgentLoop – rate‑limit handling", () => {
       // is in progress.
       const runPromise = agent.run(userMsg as any);
 
-      // The agent waits 15 000 ms between retries (rate‑limit back‑off) and does
-      // this four times (after attempts 1‑4). Fast‑forward a bit more to cover
-      // any additional small `setTimeout` calls inside the implementation.
-      await vi.advanceTimersByTimeAsync(61_000); // 4 * 15s + 1s safety margin
+      // Should be done in at most 180 seconds.
+      await vi.advanceTimersByTimeAsync(180_000);
 
       // Ensure the promise settles without throwing.
       await expect(runPromise).resolves.not.toThrow();
@@ -110,8 +108,8 @@ describe("AgentLoop – rate‑limit handling", () => {
       await vi.advanceTimersByTimeAsync(20);
 
       // The OpenAI client should have been called the maximum number of retry
-      // attempts (5).
-      expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
+      // attempts (8).
+      expect(openAiState.createSpy).toHaveBeenCalledTimes(8);
 
       // Finally, verify that the user sees a helpful system message.
       const sysMsg = received.find(
diff --git a/codex-cli/tests/agent-server-retry.test.ts b/codex-cli/tests/agent-server-retry.test.ts
index a9cc5f45..06762a1d 100644
--- a/codex-cli/tests/agent-server-retry.test.ts
+++ b/codex-cli/tests/agent-server-retry.test.ts
@@ -122,7 +122,7 @@ describe("AgentLoop – automatic retry on 5xx errors", () => {
     expect(assistant?.content?.[0]?.text).toBe("ok");
   });
 
-  it("fails after 3 attempts and surfaces system message", async () => {
+  it("fails after a few attempts and surfaces system message", async () => {
     openAiState.createSpy = vi.fn(async () => {
       const err: any = new Error("Internal Server Error");
       err.status = 502; // any 5xx
@@ -154,7 +154,7 @@ describe("AgentLoop – automatic retry on 5xx errors", () => {
 
     await new Promise((r) => setTimeout(r, 20));
 
-    expect(openAiState.createSpy).toHaveBeenCalledTimes(5);
+    expect(openAiState.createSpy).toHaveBeenCalledTimes(8);
 
     const sysMsg = received.find(
       (i) =>