From 47c683480f5fe1e1bd98f516f25845215e89dd25 Mon Sep 17 00:00:00 2001 From: Thibault Sottiaux Date: Wed, 16 Apr 2025 17:37:12 -0700 Subject: [PATCH] (feat) expontential back-off when encountering rate limit errors (#153) ...and try to parse the suggested time from the error message while we don't yet have this in a structured way --------- Signed-off-by: Thibault Sottiaux --- codex-cli/package-lock.json | 4 +- codex-cli/src/utils/agent/agent-loop.ts | 72 +++++++++++++++++-------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/codex-cli/package-lock.json b/codex-cli/package-lock.json index eb398389..241a96a5 100644 --- a/codex-cli/package-lock.json +++ b/codex-cli/package-lock.json @@ -1,12 +1,12 @@ { "name": "@openai/codex", - "version": "0.1.04161241", + "version": "0.1.2504161510", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@openai/codex", - "version": "0.1.04161241", + "version": "0.1.2504161510", "license": "Apache-2.0", "dependencies": { "@inkjs/ui": "^2.0.0", diff --git a/codex-cli/src/utils/agent/agent-loop.ts b/codex-cli/src/utils/agent/agent-loop.ts index d88604c4..f2048fc0 100644 --- a/codex-cli/src/utils/agent/agent-loop.ts +++ b/codex-cli/src/utils/agent/agent-loop.ts @@ -24,7 +24,7 @@ import OpenAI, { APIConnectionTimeoutError } from "openai"; // Wait time before retrying after rate limit errors (ms). const RATE_LIMIT_RETRY_WAIT_MS = parseInt( - process.env["OPENAI_RATE_LIMIT_RETRY_WAIT_MS"] || "15000", + process.env["OPENAI_RATE_LIMIT_RETRY_WAIT_MS"] || "2500", 10, ); @@ -569,11 +569,6 @@ export class AgentLoop { ); continue; } - const isRateLimit = - status === 429 || - errCtx.code === "rate_limit_exceeded" || - errCtx.type === "rate_limit_exceeded" || - /rate limit/i.test(errCtx.message ?? ""); const isTooManyTokensError = (errCtx.param === "max_tokens" || @@ -597,30 +592,61 @@ export class AgentLoop { return; } + const isRateLimit = + status === 429 || + errCtx.code === "rate_limit_exceeded" || + errCtx.type === "rate_limit_exceeded" || + /rate limit/i.test(errCtx.message ?? ""); if (isRateLimit) { if (attempt < MAX_RETRIES) { + // Exponential backoff: base wait * 2^(attempt-1), or use suggested retry time + // if provided. + let delayMs = RATE_LIMIT_RETRY_WAIT_MS * 2 ** (attempt - 1); + + // Parse suggested retry time from error message, e.g., "Please try again in 1.3s" + const msg = errCtx?.message ?? ""; + const m = /retry again in ([\d.]+)s/i.exec(msg); + if (m && m[1]) { + const suggested = parseFloat(m[1]) * 1000; + if (!Number.isNaN(suggested)) { + delayMs = suggested; + } + } log( - `OpenAI rate limit exceeded (attempt ${attempt}/${MAX_RETRIES}), retrying in ${RATE_LIMIT_RETRY_WAIT_MS} ms...`, + `OpenAI rate limit exceeded (attempt ${attempt}/${MAX_RETRIES}), retrying in ${Math.round( + delayMs, + )} ms...`, ); // eslint-disable-next-line no-await-in-loop - await new Promise((resolve) => - setTimeout(resolve, RATE_LIMIT_RETRY_WAIT_MS), - ); + await new Promise((resolve) => setTimeout(resolve, delayMs)); continue; + } else { + // We have exhausted all retry attempts. Surface a message so the user understands + // why the request failed and can decide how to proceed (e.g. wait and retry later + // or switch to a different model / account). + + const errorDetails = [ + `Status: ${status || "unknown"}`, + `Code: ${errCtx.code || "unknown"}`, + `Type: ${errCtx.type || "unknown"}`, + `Message: ${errCtx.message || "unknown"}`, + ].join(", "); + + this.onItem({ + id: `error-${Date.now()}`, + type: "message", + role: "system", + content: [ + { + type: "input_text", + text: `⚠️ Rate limit reached. Error details: ${errorDetails}. Please try again later.`, + }, + ], + }); + + this.onLoading(false); + return; } - this.onItem({ - id: `error-${Date.now()}`, - type: "message", - role: "system", - content: [ - { - type: "input_text", - text: "⚠️ Rate limit reached while contacting OpenAI. Please try again later.", - }, - ], - }); - this.onLoading(false); - return; } const isClientError =