Don't retry "insufficient_quota" errors (#6340)

This PR makes an "insufficient quota" error fatal so we don't attempt to
retry it multiple times in the agent loop.

We have multiple bug reports from users about intermittent retry
behaviors, and this could explain some of them. With this change, we'll
eliminate the retries and surface a clear error message.

The PR is a nearly identical copy of [this
PR](https://github.com/openai/codex/pull/4837) contributed by
@abimaelmartell. The original PR has gone stale. Rather than wait for
the contributor to resolve merge conflicts, I wanted to get this change
in.
This commit is contained in:
Eric Traut
2025-11-06 17:12:01 -06:00
committed by GitHub
parent e30f65118d
commit 0c647bc566
5 changed files with 123 additions and 0 deletions

View File

@@ -447,6 +447,8 @@ impl ModelClient {
return Err(StreamAttemptError::Fatal(codex_err));
} else if error.r#type.as_deref() == Some("usage_not_included") {
return Err(StreamAttemptError::Fatal(CodexErr::UsageNotIncluded));
} else if is_quota_exceeded_error(&error) {
return Err(StreamAttemptError::Fatal(CodexErr::QuotaExceeded));
}
}
}
@@ -844,6 +846,8 @@ async fn process_sse<S>(
Ok(error) => {
if is_context_window_error(&error) {
response_error = Some(CodexErr::ContextWindowExceeded);
} else if is_quota_exceeded_error(&error) {
response_error = Some(CodexErr::QuotaExceeded);
} else {
let delay = try_parse_retry_after(&error);
let message = error.message.clone().unwrap_or_default();
@@ -975,6 +979,10 @@ fn is_context_window_error(error: &Error) -> bool {
error.code.as_deref() == Some("context_length_exceeded")
}
fn is_quota_exceeded_error(error: &Error) -> bool {
error.code.as_deref() == Some("insufficient_quota")
}
#[cfg(test)]
mod tests {
use super::*;
@@ -1307,6 +1315,41 @@ mod tests {
}
}
#[tokio::test]
async fn quota_exceeded_error_is_fatal() {
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_fatal_quota","object":"response","created_at":1759771626,"status":"failed","background":false,"error":{"code":"insufficient_quota","message":"You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors."},"incomplete_details":null}}"#;
let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
let provider = ModelProviderInfo {
name: "test".to_string(),
base_url: Some("https://test.com".to_string()),
env_key: Some("TEST_API_KEY".to_string()),
env_key_instructions: None,
experimental_bearer_token: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: Some(0),
stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000),
requires_openai_auth: false,
};
let otel_event_manager = otel_event_manager();
let events = collect_events(&[sse1.as_bytes()], provider, otel_event_manager).await;
assert_eq!(events.len(), 1);
match &events[0] {
Err(err @ CodexErr::QuotaExceeded) => {
assert_eq!(err.to_string(), CodexErr::QuotaExceeded.to_string());
}
other => panic!("unexpected quota exceeded event: {other:?}"),
}
}
// ────────────────────────────
// Table-driven test from `main`
// ────────────────────────────