Wait for requested delay in rate limit errors (#2266)
Fixes: https://github.com/openai/codex/issues/2131 Response doesn't have the delay in a separate field (yet) so parse the message.
This commit is contained in:
@@ -29,6 +29,7 @@ mcp-types = { path = "../mcp-types" }
|
|||||||
mime_guess = "2.0"
|
mime_guess = "2.0"
|
||||||
os_info = "3.12.0"
|
os_info = "3.12.0"
|
||||||
rand = "0.9"
|
rand = "0.9"
|
||||||
|
regex-lite = "0.1.6"
|
||||||
reqwest = { version = "0.12", features = ["json", "stream"] }
|
reqwest = { version = "0.12", features = ["json", "stream"] }
|
||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
serde_bytes = "0.11"
|
serde_bytes = "0.11"
|
||||||
@@ -76,7 +77,6 @@ core_test_support = { path = "tests/common" }
|
|||||||
maplit = "1.0.2"
|
maplit = "1.0.2"
|
||||||
predicates = "3"
|
predicates = "3"
|
||||||
pretty_assertions = "1.4.1"
|
pretty_assertions = "1.4.1"
|
||||||
regex-lite = "0.1.6"
|
|
||||||
tempfile = "3"
|
tempfile = "3"
|
||||||
tokio-test = "0.4"
|
tokio-test = "0.4"
|
||||||
walkdir = "2.5.0"
|
walkdir = "2.5.0"
|
||||||
|
|||||||
@@ -213,7 +213,9 @@ async fn process_chat_sse<S>(
|
|||||||
let sse = match timeout(idle_timeout, stream.next()).await {
|
let sse = match timeout(idle_timeout, stream.next()).await {
|
||||||
Ok(Some(Ok(ev))) => ev,
|
Ok(Some(Ok(ev))) => ev,
|
||||||
Ok(Some(Err(e))) => {
|
Ok(Some(Err(e))) => {
|
||||||
let _ = tx_event.send(Err(CodexErr::Stream(e.to_string()))).await;
|
let _ = tx_event
|
||||||
|
.send(Err(CodexErr::Stream(e.to_string(), None)))
|
||||||
|
.await;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Ok(None) => {
|
Ok(None) => {
|
||||||
@@ -228,7 +230,10 @@ async fn process_chat_sse<S>(
|
|||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
let _ = tx_event
|
let _ = tx_event
|
||||||
.send(Err(CodexErr::Stream("idle timeout waiting for SSE".into())))
|
.send(Err(CodexErr::Stream(
|
||||||
|
"idle timeout waiting for SSE".into(),
|
||||||
|
None,
|
||||||
|
)))
|
||||||
.await;
|
.await;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use std::io::BufRead;
|
use std::io::BufRead;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
use std::sync::OnceLock;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use bytes::Bytes;
|
use bytes::Bytes;
|
||||||
@@ -7,6 +8,7 @@ use codex_login::AuthMode;
|
|||||||
use codex_login::CodexAuth;
|
use codex_login::CodexAuth;
|
||||||
use eventsource_stream::Eventsource;
|
use eventsource_stream::Eventsource;
|
||||||
use futures::prelude::*;
|
use futures::prelude::*;
|
||||||
|
use regex_lite::Regex;
|
||||||
use reqwest::StatusCode;
|
use reqwest::StatusCode;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
@@ -49,7 +51,9 @@ struct ErrorResponse {
|
|||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
struct Error {
|
struct Error {
|
||||||
r#type: String,
|
r#type: Option<String>,
|
||||||
|
code: Option<String>,
|
||||||
|
message: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@@ -263,14 +267,18 @@ impl ModelClient {
|
|||||||
if status == StatusCode::TOO_MANY_REQUESTS {
|
if status == StatusCode::TOO_MANY_REQUESTS {
|
||||||
let body = res.json::<ErrorResponse>().await.ok();
|
let body = res.json::<ErrorResponse>().await.ok();
|
||||||
if let Some(ErrorResponse {
|
if let Some(ErrorResponse {
|
||||||
error: Error { r#type, .. },
|
error:
|
||||||
|
Error {
|
||||||
|
r#type: Some(error_type),
|
||||||
|
..
|
||||||
|
},
|
||||||
}) = body
|
}) = body
|
||||||
{
|
{
|
||||||
if r#type == "usage_limit_reached" {
|
if error_type == "usage_limit_reached" {
|
||||||
return Err(CodexErr::UsageLimitReached(UsageLimitReachedError {
|
return Err(CodexErr::UsageLimitReached(UsageLimitReachedError {
|
||||||
plan_type: auth.and_then(|a| a.get_plan_type()),
|
plan_type: auth.and_then(|a| a.get_plan_type()),
|
||||||
}));
|
}));
|
||||||
} else if r#type == "usage_not_included" {
|
} else if error_type == "usage_not_included" {
|
||||||
return Err(CodexErr::UsageNotIncluded);
|
return Err(CodexErr::UsageNotIncluded);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -366,13 +374,14 @@ async fn process_sse<S>(
|
|||||||
// If the stream stays completely silent for an extended period treat it as disconnected.
|
// If the stream stays completely silent for an extended period treat it as disconnected.
|
||||||
// The response id returned from the "complete" message.
|
// The response id returned from the "complete" message.
|
||||||
let mut response_completed: Option<ResponseCompleted> = None;
|
let mut response_completed: Option<ResponseCompleted> = None;
|
||||||
|
let mut response_error: Option<CodexErr> = None;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let sse = match timeout(idle_timeout, stream.next()).await {
|
let sse = match timeout(idle_timeout, stream.next()).await {
|
||||||
Ok(Some(Ok(sse))) => sse,
|
Ok(Some(Ok(sse))) => sse,
|
||||||
Ok(Some(Err(e))) => {
|
Ok(Some(Err(e))) => {
|
||||||
debug!("SSE Error: {e:#}");
|
debug!("SSE Error: {e:#}");
|
||||||
let event = CodexErr::Stream(e.to_string());
|
let event = CodexErr::Stream(e.to_string(), None);
|
||||||
let _ = tx_event.send(Err(event)).await;
|
let _ = tx_event.send(Err(event)).await;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -390,9 +399,10 @@ async fn process_sse<S>(
|
|||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
let _ = tx_event
|
let _ = tx_event
|
||||||
.send(Err(CodexErr::Stream(
|
.send(Err(response_error.unwrap_or(CodexErr::Stream(
|
||||||
"stream closed before response.completed".into(),
|
"stream closed before response.completed".into(),
|
||||||
)))
|
None,
|
||||||
|
))))
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -400,7 +410,10 @@ async fn process_sse<S>(
|
|||||||
}
|
}
|
||||||
Err(_) => {
|
Err(_) => {
|
||||||
let _ = tx_event
|
let _ = tx_event
|
||||||
.send(Err(CodexErr::Stream("idle timeout waiting for SSE".into())))
|
.send(Err(CodexErr::Stream(
|
||||||
|
"idle timeout waiting for SSE".into(),
|
||||||
|
None,
|
||||||
|
)))
|
||||||
.await;
|
.await;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -478,15 +491,25 @@ async fn process_sse<S>(
|
|||||||
}
|
}
|
||||||
"response.failed" => {
|
"response.failed" => {
|
||||||
if let Some(resp_val) = event.response {
|
if let Some(resp_val) = event.response {
|
||||||
let error = resp_val
|
response_error = Some(CodexErr::Stream(
|
||||||
.get("error")
|
"response.failed event received".to_string(),
|
||||||
.and_then(|v| v.get("message"))
|
None,
|
||||||
.and_then(|v| v.as_str())
|
));
|
||||||
.unwrap_or("response.failed event received");
|
|
||||||
|
|
||||||
let _ = tx_event
|
let error = resp_val.get("error");
|
||||||
.send(Err(CodexErr::Stream(error.to_string())))
|
|
||||||
.await;
|
if let Some(error) = error {
|
||||||
|
match serde_json::from_value::<Error>(error.clone()) {
|
||||||
|
Ok(error) => {
|
||||||
|
let delay = try_parse_retry_after(&error);
|
||||||
|
let message = error.message.unwrap_or_default();
|
||||||
|
response_error = Some(CodexErr::Stream(message, delay));
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
debug!("failed to parse ErrorResponse: {e}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Final response completed – includes array of output items & id
|
// Final response completed – includes array of output items & id
|
||||||
@@ -550,6 +573,40 @@ async fn stream_from_fixture(
|
|||||||
Ok(ResponseStream { rx_event })
|
Ok(ResponseStream { rx_event })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn rate_limit_regex() -> &'static Regex {
|
||||||
|
static RE: OnceLock<Regex> = OnceLock::new();
|
||||||
|
|
||||||
|
#[expect(clippy::unwrap_used)]
|
||||||
|
RE.get_or_init(|| Regex::new(r"Please try again in (\d+(?:\.\d+)?)(s|ms)").unwrap())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_parse_retry_after(err: &Error) -> Option<Duration> {
|
||||||
|
if err.code != Some("rate_limit_exceeded".to_string()) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// parse the Please try again in 1.898s format using regex
|
||||||
|
let re = rate_limit_regex();
|
||||||
|
if let Some(message) = &err.message
|
||||||
|
&& let Some(captures) = re.captures(message)
|
||||||
|
{
|
||||||
|
let seconds = captures.get(1);
|
||||||
|
let unit = captures.get(2);
|
||||||
|
|
||||||
|
if let (Some(value), Some(unit)) = (seconds, unit) {
|
||||||
|
let value = value.as_str().parse::<f64>().ok()?;
|
||||||
|
let unit = unit.as_str();
|
||||||
|
|
||||||
|
if unit == "s" {
|
||||||
|
return Some(Duration::from_secs_f64(value));
|
||||||
|
} else if unit == "ms" {
|
||||||
|
return Some(Duration::from_millis(value as u64));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||||
@@ -735,13 +792,49 @@ mod tests {
|
|||||||
matches!(events[0], Ok(ResponseEvent::OutputItemDone(_)));
|
matches!(events[0], Ok(ResponseEvent::OutputItemDone(_)));
|
||||||
|
|
||||||
match &events[1] {
|
match &events[1] {
|
||||||
Err(CodexErr::Stream(msg)) => {
|
Err(CodexErr::Stream(msg, _)) => {
|
||||||
assert_eq!(msg, "stream closed before response.completed")
|
assert_eq!(msg, "stream closed before response.completed")
|
||||||
}
|
}
|
||||||
other => panic!("unexpected second event: {other:?}"),
|
other => panic!("unexpected second event: {other:?}"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn error_when_error_event() {
|
||||||
|
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_689bcf18d7f08194bf3440ba62fe05d803fee0cdac429894","object":"response","created_at":1755041560,"status":"failed","background":false,"error":{"code":"rate_limit_exceeded","message":"Rate limit reached for gpt-5 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."}, "usage":null,"user":null,"metadata":{}}}"#;
|
||||||
|
|
||||||
|
let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
|
||||||
|
let provider = ModelProviderInfo {
|
||||||
|
name: "test".to_string(),
|
||||||
|
base_url: Some("https://test.com".to_string()),
|
||||||
|
env_key: Some("TEST_API_KEY".to_string()),
|
||||||
|
env_key_instructions: None,
|
||||||
|
wire_api: WireApi::Responses,
|
||||||
|
query_params: None,
|
||||||
|
http_headers: None,
|
||||||
|
env_http_headers: None,
|
||||||
|
request_max_retries: Some(0),
|
||||||
|
stream_max_retries: Some(0),
|
||||||
|
stream_idle_timeout_ms: Some(1000),
|
||||||
|
requires_openai_auth: false,
|
||||||
|
};
|
||||||
|
|
||||||
|
let events = collect_events(&[sse1.as_bytes()], provider).await;
|
||||||
|
|
||||||
|
assert_eq!(events.len(), 1);
|
||||||
|
|
||||||
|
match &events[0] {
|
||||||
|
Err(CodexErr::Stream(msg, delay)) => {
|
||||||
|
assert_eq!(
|
||||||
|
msg,
|
||||||
|
"Rate limit reached for gpt-5 in organization org-AAA on tokens per min (TPM): Limit 30000, Used 22999, Requested 12528. Please try again in 11.054s. Visit https://platform.openai.com/account/rate-limits to learn more."
|
||||||
|
);
|
||||||
|
assert_eq!(*delay, Some(Duration::from_secs_f64(11.054)));
|
||||||
|
}
|
||||||
|
other => panic!("unexpected second event: {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ────────────────────────────
|
// ────────────────────────────
|
||||||
// Table-driven test from `main`
|
// Table-driven test from `main`
|
||||||
// ────────────────────────────
|
// ────────────────────────────
|
||||||
@@ -840,4 +933,27 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_try_parse_retry_after() {
|
||||||
|
let err = Error {
|
||||||
|
r#type: None,
|
||||||
|
message: Some("Rate limit reached for gpt-5 in organization org- on tokens per min (TPM): Limit 1, Used 1, Requested 19304. Please try again in 28ms. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||||
|
code: Some("rate_limit_exceeded".to_string()),
|
||||||
|
};
|
||||||
|
|
||||||
|
let delay = try_parse_retry_after(&err);
|
||||||
|
assert_eq!(delay, Some(Duration::from_millis(28)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_try_parse_retry_after_no_delay() {
|
||||||
|
let err = Error {
|
||||||
|
r#type: None,
|
||||||
|
message: Some("Rate limit reached for gpt-5 in organization <ORG> on tokens per min (TPM): Limit 30000, Used 6899, Requested 24050. Please try again in 1.898s. Visit https://platform.openai.com/account/rate-limits to learn more.".to_string()),
|
||||||
|
code: Some("rate_limit_exceeded".to_string()),
|
||||||
|
};
|
||||||
|
let delay = try_parse_retry_after(&err);
|
||||||
|
assert_eq!(delay, Some(Duration::from_secs_f64(1.898)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1281,7 +1281,10 @@ async fn run_turn(
|
|||||||
let max_retries = sess.client.get_provider().stream_max_retries();
|
let max_retries = sess.client.get_provider().stream_max_retries();
|
||||||
if retries < max_retries {
|
if retries < max_retries {
|
||||||
retries += 1;
|
retries += 1;
|
||||||
let delay = backoff(retries);
|
let delay = match e {
|
||||||
|
CodexErr::Stream(_, Some(delay)) => delay,
|
||||||
|
_ => backoff(retries),
|
||||||
|
};
|
||||||
warn!(
|
warn!(
|
||||||
"stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
|
"stream disconnected - retrying turn ({retries}/{max_retries} in {delay:?})...",
|
||||||
);
|
);
|
||||||
@@ -1391,6 +1394,7 @@ async fn try_run_turn(
|
|||||||
// Treat as a disconnected stream so the caller can retry.
|
// Treat as a disconnected stream so the caller can retry.
|
||||||
return Err(CodexErr::Stream(
|
return Err(CodexErr::Stream(
|
||||||
"stream closed before response.completed".into(),
|
"stream closed before response.completed".into(),
|
||||||
|
None,
|
||||||
));
|
));
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -2201,6 +2205,7 @@ async fn drain_to_completed(sess: &Session, sub_id: &str, prompt: &Prompt) -> Co
|
|||||||
let Some(event) = maybe_event else {
|
let Some(event) = maybe_event else {
|
||||||
return Err(CodexErr::Stream(
|
return Err(CodexErr::Stream(
|
||||||
"stream closed before response.completed".into(),
|
"stream closed before response.completed".into(),
|
||||||
|
None,
|
||||||
));
|
));
|
||||||
};
|
};
|
||||||
match event {
|
match event {
|
||||||
@@ -2218,6 +2223,7 @@ async fn drain_to_completed(sess: &Session, sub_id: &str, prompt: &Prompt) -> Co
|
|||||||
None => {
|
None => {
|
||||||
return Err(CodexErr::Stream(
|
return Err(CodexErr::Stream(
|
||||||
"token_usage was None in ResponseEvent::Completed".into(),
|
"token_usage was None in ResponseEvent::Completed".into(),
|
||||||
|
None,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
use reqwest::StatusCode;
|
use reqwest::StatusCode;
|
||||||
use serde_json;
|
use serde_json;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
use std::time::Duration;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use tokio::task::JoinError;
|
use tokio::task::JoinError;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
@@ -42,8 +43,10 @@ pub enum CodexErr {
|
|||||||
/// handshake has succeeded but **before** it finished emitting `response.completed`.
|
/// handshake has succeeded but **before** it finished emitting `response.completed`.
|
||||||
///
|
///
|
||||||
/// The Session loop treats this as a transient error and will automatically retry the turn.
|
/// The Session loop treats this as a transient error and will automatically retry the turn.
|
||||||
|
///
|
||||||
|
/// Optionally includes the requested delay before retrying the turn.
|
||||||
#[error("stream disconnected before completion: {0}")]
|
#[error("stream disconnected before completion: {0}")]
|
||||||
Stream(String),
|
Stream(String, Option<Duration>),
|
||||||
|
|
||||||
#[error("no conversation with id: {0}")]
|
#[error("no conversation with id: {0}")]
|
||||||
ConversationNotFound(Uuid),
|
ConversationNotFound(Uuid),
|
||||||
|
|||||||
Reference in New Issue
Block a user