Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
This commit is contained in:
Ahmed Ibrahim
2025-10-04 18:40:06 -07:00
committed by GitHub
parent 6c2969d22d
commit 90ef94d3b3
8 changed files with 254 additions and 4 deletions

View File

@@ -63,7 +63,6 @@ struct ErrorResponse {
#[derive(Debug, Deserialize)]
struct Error {
r#type: Option<String>,
#[allow(dead_code)]
code: Option<String>,
message: Option<String>,
@@ -794,9 +793,13 @@ async fn process_sse<S>(
if let Some(error) = error {
match serde_json::from_value::<Error>(error.clone()) {
Ok(error) => {
let delay = try_parse_retry_after(&error);
let message = error.message.unwrap_or_default();
response_error = Some(CodexErr::Stream(message, delay));
if is_context_window_error(&error) {
response_error = Some(CodexErr::ContextWindowExceeded);
} else {
let delay = try_parse_retry_after(&error);
let message = error.message.clone().unwrap_or_default();
response_error = Some(CodexErr::Stream(message, delay));
}
}
Err(e) => {
let error = format!("failed to parse ErrorResponse: {e}");
@@ -922,6 +925,10 @@ fn try_parse_retry_after(err: &Error) -> Option<Duration> {
None
}
fn is_context_window_error(error: &Error) -> bool {
error.code.as_deref() == Some("context_length_exceeded")
}
#[cfg(test)]
mod tests {
use super::*;
@@ -1179,6 +1186,74 @@ mod tests {
}
}
#[tokio::test]
async fn context_window_error_is_fatal() {
let raw_error = r#"{"type":"response.failed","sequence_number":3,"response":{"id":"resp_5c66275b97b9baef1ed95550adb3b7ec13b17aafd1d2f11b","object":"response","created_at":1759510079,"status":"failed","background":false,"error":{"code":"context_length_exceeded","message":"Your input exceeds the context window of this model. Please adjust your input and try again."},"usage":null,"user":null,"metadata":{}}}"#;
let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
let provider = ModelProviderInfo {
name: "test".to_string(),
base_url: Some("https://test.com".to_string()),
env_key: Some("TEST_API_KEY".to_string()),
env_key_instructions: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: Some(0),
stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000),
requires_openai_auth: false,
};
let otel_event_manager = otel_event_manager();
let events = collect_events(&[sse1.as_bytes()], provider, otel_event_manager).await;
assert_eq!(events.len(), 1);
match &events[0] {
Err(err @ CodexErr::ContextWindowExceeded) => {
assert_eq!(err.to_string(), CodexErr::ContextWindowExceeded.to_string());
}
other => panic!("unexpected context window event: {other:?}"),
}
}
#[tokio::test]
async fn context_window_error_with_newline_is_fatal() {
let raw_error = r#"{"type":"response.failed","sequence_number":4,"response":{"id":"resp_fatal_newline","object":"response","created_at":1759510080,"status":"failed","background":false,"error":{"code":"context_length_exceeded","message":"Your input exceeds the context window of this model. Please adjust your input and try\nagain."},"usage":null,"user":null,"metadata":{}}}"#;
let sse1 = format!("event: response.failed\ndata: {raw_error}\n\n");
let provider = ModelProviderInfo {
name: "test".to_string(),
base_url: Some("https://test.com".to_string()),
env_key: Some("TEST_API_KEY".to_string()),
env_key_instructions: None,
wire_api: WireApi::Responses,
query_params: None,
http_headers: None,
env_http_headers: None,
request_max_retries: Some(0),
stream_max_retries: Some(0),
stream_idle_timeout_ms: Some(1000),
requires_openai_auth: false,
};
let otel_event_manager = otel_event_manager();
let events = collect_events(&[sse1.as_bytes()], provider, otel_event_manager).await;
assert_eq!(events.len(), 1);
match &events[0] {
Err(err @ CodexErr::ContextWindowExceeded) => {
assert_eq!(err.to_string(), CodexErr::ContextWindowExceeded.to_string());
}
other => panic!("unexpected context window event: {other:?}"),
}
}
// ────────────────────────────
// Table-driven test from `main`
// ────────────────────────────

View File

@@ -782,6 +782,17 @@ impl Session {
self.send_event(event).await;
}
async fn set_total_tokens_full(&self, sub_id: &str, turn_context: &TurnContext) {
let context_window = turn_context.client.get_model_context_window();
if let Some(context_window) = context_window {
{
let mut state = self.state.lock().await;
state.set_token_usage_full(context_window);
}
self.send_token_count_event(sub_id).await;
}
}
/// Record a user input item to conversation history and also persist a
/// corresponding UserMessage EventMsg to rollout.
async fn record_input_and_rollout_usermsg(&self, response_input: &ResponseInputItem) {
@@ -1938,6 +1949,10 @@ async fn run_turn(
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
Err(e @ CodexErr::Fatal(_)) => return Err(e),
Err(e @ CodexErr::ContextWindowExceeded) => {
sess.set_total_tokens_full(&sub_id, turn_context).await;
return Err(e);
}
Err(CodexErr::UsageLimitReached(e)) => {
let rate_limits = e.rate_limits.clone();
if let Some(rate_limits) = rate_limits {

View File

@@ -103,6 +103,18 @@ async fn run_compact_task_inner(
Err(CodexErr::Interrupted) => {
return;
}
Err(e @ CodexErr::ContextWindowExceeded) => {
sess.set_total_tokens_full(&sub_id, turn_context.as_ref())
.await;
let event = Event {
id: sub_id.clone(),
msg: EventMsg::Error(ErrorEvent {
message: e.to_string(),
}),
};
sess.send_event(event).await;
return;
}
Err(e) => {
if retries < max_retries {
retries += 1;

View File

@@ -55,6 +55,11 @@ pub enum CodexErr {
#[error("stream disconnected before completion: {0}")]
Stream(String, Option<Duration>),
#[error(
"Codex ran out of room in the model's context window. Start a new conversation or clear earlier history before retrying."
)]
ContextWindowExceeded,
#[error("no conversation with id: {0}")]
ConversationNotFound(ConversationId),

View File

@@ -64,5 +64,14 @@ impl SessionState {
(self.token_info.clone(), self.latest_rate_limits.clone())
}
pub(crate) fn set_token_usage_full(&mut self, context_window: u64) {
match &mut self.token_info {
Some(info) => info.fill_to_context_window(context_window),
None => {
self.token_info = Some(TokenUsageInfo::full_context_window(context_window));
}
}
}
// Pending input/approval moved to TurnState.
}