Don't retry "insufficient_quota" errors (#6340)
This PR makes an "insufficient quota" error fatal so we don't attempt to retry it multiple times in the agent loop. We have multiple bug reports from users about intermittent retry behaviors, and this could explain some of them. With this change, we'll eliminate the retries and surface a clear error message. The PR is a nearly identical copy of [this PR](https://github.com/openai/codex/pull/4837) contributed by @abimaelmartell. The original PR has gone stale. Rather than wait for the contributor to resolve merge conflicts, I wanted to get this change in.
This commit is contained in:
@@ -447,6 +447,8 @@ impl ModelClient {
|
||||
return Err(StreamAttemptError::Fatal(codex_err));
|
||||
} else if error.r#type.as_deref() == Some("usage_not_included") {
|
||||
return Err(StreamAttemptError::Fatal(CodexErr::UsageNotIncluded));
|
||||
} else if is_quota_exceeded_error(&error) {
|
||||
return Err(StreamAttemptError::Fatal(CodexErr::QuotaExceeded));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -844,6 +846,8 @@ async fn process_sse<S>(
|
||||
Ok(error) => {
|
||||
if is_context_window_error(&error) {
|
||||
response_error = Some(CodexErr::ContextWindowExceeded);
|
||||
} else if is_quota_exceeded_error(&error) {
|
||||
response_error = Some(CodexErr::QuotaExceeded);
|
||||
} else {
|
||||
let delay = try_parse_retry_after(&error);
|
||||
let message = error.message.clone().unwrap_or_default();
|
||||
@@ -975,6 +979,10 @@ fn is_context_window_error(error: &Error) -> bool {
|
||||
error.code.as_deref() == Some("context_length_exceeded")
|
||||
}
|
||||
|
||||
fn is_quota_exceeded_error(error: &Error) -> bool {
|
||||
error.code.as_deref() == Some("insufficient_quota")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -1307,6 +1315,41 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn quota_exceeded_error_is_fatal() {
|
||||
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_fatal_quota","object":"response","created_at":1759771626,"status":"failed","background":false,"error":{"code":"insufficient_quota","message":"You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors."},"incomplete_details":null}}"#;
|
||||
|
||||
let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
|
||||
let provider = ModelProviderInfo {
|
||||
name: "test".to_string(),
|
||||
base_url: Some("https://test.com".to_string()),
|
||||
env_key: Some("TEST_API_KEY".to_string()),
|
||||
env_key_instructions: None,
|
||||
experimental_bearer_token: None,
|
||||
wire_api: WireApi::Responses,
|
||||
query_params: None,
|
||||
http_headers: None,
|
||||
env_http_headers: None,
|
||||
request_max_retries: Some(0),
|
||||
stream_max_retries: Some(0),
|
||||
stream_idle_timeout_ms: Some(1000),
|
||||
requires_openai_auth: false,
|
||||
};
|
||||
|
||||
let otel_event_manager = otel_event_manager();
|
||||
|
||||
let events = collect_events(&[sse1.as_bytes()], provider, otel_event_manager).await;
|
||||
|
||||
assert_eq!(events.len(), 1);
|
||||
|
||||
match &events[0] {
|
||||
Err(err @ CodexErr::QuotaExceeded) => {
|
||||
assert_eq!(err.to_string(), CodexErr::QuotaExceeded.to_string());
|
||||
}
|
||||
other => panic!("unexpected quota exceeded event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
// ────────────────────────────
|
||||
// Table-driven test from `main`
|
||||
// ────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user