diff --git a/AGENTS.md b/AGENTS.md index 81b8d10a..de6ef8b9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -11,6 +11,7 @@ In the codex-rs folder where the rust code lives: - Always collapse if statements per https://rust-lang.github.io/rust-clippy/master/index.html#collapsible_if - Always inline format! args when possible per https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args - Use method references over closures when possible per https://rust-lang.github.io/rust-clippy/master/index.html#redundant_closure_for_method_calls +- Do not use unsigned integer even if the number cannot be negative. - When writing tests, prefer comparing the equality of entire objects over fields one by one. - When making a change that adds or changes an API, ensure that the documentation in the `docs/` folder is up to date if applicable. diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index b215106d..6865e67f 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -112,10 +112,12 @@ impl ModelClient { } } - pub fn get_model_context_window(&self) -> Option { + pub fn get_model_context_window(&self) -> Option { + let pct = self.config.model_family.effective_context_window_percent; self.config .model_context_window .or_else(|| get_model_info(&self.config.model_family).map(|info| info.context_window)) + .map(|w| w.saturating_mul(pct) / 100) } pub fn get_auto_compact_token_limit(&self) -> Option { @@ -544,11 +546,11 @@ struct ResponseCompleted { #[derive(Debug, Deserialize)] struct ResponseCompletedUsage { - input_tokens: u64, + input_tokens: i64, input_tokens_details: Option, - output_tokens: u64, + output_tokens: i64, output_tokens_details: Option, - total_tokens: u64, + total_tokens: i64, } impl From for TokenUsage { @@ -571,12 +573,12 @@ impl From for TokenUsage { #[derive(Debug, Deserialize)] struct ResponseCompletedInputTokensDetails { - cached_tokens: u64, + cached_tokens: i64, } #[derive(Debug, Deserialize)] struct ResponseCompletedOutputTokensDetails { - reasoning_tokens: u64, + reasoning_tokens: i64, } fn attach_item_ids(payload_json: &mut Value, original_items: &[ResponseItem]) { @@ -633,7 +635,7 @@ fn parse_rate_limit_window( let used_percent: Option = parse_header_f64(headers, used_percent_header); used_percent.and_then(|used_percent| { - let window_minutes = parse_header_u64(headers, window_minutes_header); + let window_minutes = parse_header_i64(headers, window_minutes_header); let resets_at = parse_header_str(headers, resets_header) .map(str::trim) .filter(|value| !value.is_empty()) @@ -658,8 +660,8 @@ fn parse_header_f64(headers: &HeaderMap, name: &str) -> Option { .filter(|v| v.is_finite()) } -fn parse_header_u64(headers: &HeaderMap, name: &str) -> Option { - parse_header_str(headers, name)?.parse::().ok() +fn parse_header_i64(headers: &HeaderMap, name: &str) -> Option { + parse_header_str(headers, name)?.parse::().ok() } fn parse_header_str<'a>(headers: &'a HeaderMap, name: &str) -> Option<&'a str> { diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 2ce09970..b7554f0e 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1778,7 +1778,7 @@ pub(crate) async fn run_task( .as_ref() .map(TokenUsage::tokens_in_context_window); let token_limit_reached = total_usage_tokens - .map(|tokens| (tokens as i64) >= limit) + .map(|tokens| tokens >= limit) .unwrap_or(false); let mut items_to_record_in_conversation_history = Vec::::new(); let mut responses = Vec::::new(); diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index 0283e360..93d0ad4b 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -85,10 +85,10 @@ pub struct Config { pub model_family: ModelFamily, /// Size of the context window for the model, in tokens. - pub model_context_window: Option, + pub model_context_window: Option, /// Maximum number of output tokens. - pub model_max_output_tokens: Option, + pub model_max_output_tokens: Option, /// Token usage threshold triggering auto-compaction of conversation history. pub model_auto_compact_token_limit: Option, @@ -824,10 +824,10 @@ pub struct ConfigToml { pub model_provider: Option, /// Size of the context window for the model, in tokens. - pub model_context_window: Option, + pub model_context_window: Option, /// Maximum number of output tokens. - pub model_max_output_tokens: Option, + pub model_max_output_tokens: Option, /// Token usage threshold triggering auto-compaction of conversation history. pub model_auto_compact_token_limit: Option, @@ -2805,7 +2805,7 @@ model_verbosity = "high" model_family: find_family_for_model("o3").expect("known model slug"), model_context_window: Some(200_000), model_max_output_tokens: Some(100_000), - model_auto_compact_token_limit: None, + model_auto_compact_token_limit: Some(180_000), model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::Never, @@ -2874,7 +2874,7 @@ model_verbosity = "high" model_family: find_family_for_model("gpt-3.5-turbo").expect("known model slug"), model_context_window: Some(16_385), model_max_output_tokens: Some(4_096), - model_auto_compact_token_limit: None, + model_auto_compact_token_limit: Some(14_746), model_provider_id: "openai-chat-completions".to_string(), model_provider: fixture.openai_chat_completions_provider.clone(), approval_policy: AskForApproval::UnlessTrusted, @@ -2958,7 +2958,7 @@ model_verbosity = "high" model_family: find_family_for_model("o3").expect("known model slug"), model_context_window: Some(200_000), model_max_output_tokens: Some(100_000), - model_auto_compact_token_limit: None, + model_auto_compact_token_limit: Some(180_000), model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::OnFailure, @@ -3028,7 +3028,7 @@ model_verbosity = "high" model_family: find_family_for_model("gpt-5").expect("known model slug"), model_context_window: Some(272_000), model_max_output_tokens: Some(128_000), - model_auto_compact_token_limit: None, + model_auto_compact_token_limit: Some(244_800), model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::OnFailure, diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs index 80b3f279..623cc26e 100644 --- a/codex-rs/core/src/model_family.rs +++ b/codex-rs/core/src/model_family.rs @@ -48,6 +48,12 @@ pub struct ModelFamily { /// Names of beta tools that should be exposed to this model family. pub experimental_supported_tools: Vec, + + /// Percentage of the context window considered usable for inputs, after + /// reserving headroom for system prompts, tool overhead, and model output. + /// This is applied when computing the effective context window seen by + /// consumers. + pub effective_context_window_percent: i64, } macro_rules! model_family { @@ -66,6 +72,7 @@ macro_rules! model_family { apply_patch_tool_type: None, base_instructions: BASE_INSTRUCTIONS.to_string(), experimental_supported_tools: Vec::new(), + effective_context_window_percent: 95, }; // apply overrides $( @@ -175,5 +182,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily { apply_patch_tool_type: None, base_instructions: BASE_INSTRUCTIONS.to_string(), experimental_supported_tools: Vec::new(), + effective_context_window_percent: 95, } } diff --git a/codex-rs/core/src/openai_model_info.rs b/codex-rs/core/src/openai_model_info.rs index d1d2305a..e0d0bfd1 100644 --- a/codex-rs/core/src/openai_model_info.rs +++ b/codex-rs/core/src/openai_model_info.rs @@ -1,5 +1,9 @@ use crate::model_family::ModelFamily; +// Shared constants for commonly used window/token sizes. +pub(crate) const CONTEXT_WINDOW_272K: i64 = 272_000; +pub(crate) const MAX_OUTPUT_TOKENS_128K: i64 = 128_000; + /// Metadata about a model, particularly OpenAI models. /// We may want to consider including details like the pricing for /// input tokens, output tokens, etc., though users will need to be able to @@ -8,10 +12,10 @@ use crate::model_family::ModelFamily; #[derive(Debug)] pub(crate) struct ModelInfo { /// Size of the context window in tokens. This is the maximum size of the input context. - pub(crate) context_window: u64, + pub(crate) context_window: i64, /// Maximum number of output tokens that can be generated for the model. - pub(crate) max_output_tokens: u64, + pub(crate) max_output_tokens: i64, /// Token threshold where we should automatically compact conversation history. This considers /// input tokens + output tokens of this turn. @@ -19,13 +23,17 @@ pub(crate) struct ModelInfo { } impl ModelInfo { - const fn new(context_window: u64, max_output_tokens: u64) -> Self { + const fn new(context_window: i64, max_output_tokens: i64) -> Self { Self { context_window, max_output_tokens, - auto_compact_token_limit: None, + auto_compact_token_limit: Some(Self::default_auto_compact_limit(context_window)), } } + + const fn default_auto_compact_limit(context_window: i64) -> i64 { + (context_window * 9) / 10 + } } pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option { @@ -62,15 +70,17 @@ pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option { // https://platform.openai.com/docs/models/gpt-3.5-turbo "gpt-3.5-turbo" => Some(ModelInfo::new(16_385, 4_096)), - _ if slug.starts_with("gpt-5-codex") => Some(ModelInfo { - context_window: 272_000, - max_output_tokens: 128_000, - auto_compact_token_limit: Some(350_000), - }), + _ if slug.starts_with("gpt-5-codex") => { + Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K)) + } - _ if slug.starts_with("gpt-5") => Some(ModelInfo::new(272_000, 128_000)), + _ if slug.starts_with("gpt-5") => { + Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K)) + } - _ if slug.starts_with("codex-") => Some(ModelInfo::new(272_000, 128_000)), + _ if slug.starts_with("codex-") => { + Some(ModelInfo::new(CONTEXT_WINDOW_272K, MAX_OUTPUT_TOKENS_128K)) + } _ => None, } diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index fa7543ff..eaa3bcb9 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -48,7 +48,7 @@ impl SessionState { pub(crate) fn update_token_info_from_usage( &mut self, usage: &TokenUsage, - model_context_window: Option, + model_context_window: Option, ) { self.token_info = TokenUsageInfo::new_or_append( &self.token_info, @@ -67,7 +67,7 @@ impl SessionState { (self.token_info.clone(), self.latest_rate_limits.clone()) } - pub(crate) fn set_token_usage_full(&mut self, context_window: u64) { + pub(crate) fn set_token_usage_full(&mut self, context_window: i64) { match &mut self.token_info { Some(info) => info.fill_to_context_window(context_window), None => { diff --git a/codex-rs/core/tests/common/responses.rs b/codex-rs/core/tests/common/responses.rs index 98b3eca1..a8a777ae 100644 --- a/codex-rs/core/tests/common/responses.rs +++ b/codex-rs/core/tests/common/responses.rs @@ -138,7 +138,7 @@ pub fn ev_response_created(id: &str) -> Value { }) } -pub fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value { +pub fn ev_completed_with_tokens(id: &str, total_tokens: i64) -> Value { serde_json::json!({ "type": "response.completed", "response": { diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 817f2c2a..d84d8c03 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -858,8 +858,8 @@ async fn token_count_includes_rate_limits_snapshot() { "reasoning_output_tokens": 0, "total_tokens": 123 }, - // Default model is gpt-5-codex in tests → 272000 context window - "model_context_window": 272000 + // Default model is gpt-5-codex in tests → 95% usable context window + "model_context_window": 258400 }, "rate_limits": { "primary": { @@ -985,6 +985,8 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res skip_if_no_network!(Ok(())); let server = MockServer::start().await; + const EFFECTIVE_CONTEXT_WINDOW: i64 = (272_000 * 95) / 100; + responses::mount_sse_once_match( &server, body_string_contains("trigger context window"), @@ -1056,8 +1058,11 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res .info .expect("token usage info present when context window is exceeded"); - assert_eq!(info.model_context_window, Some(272_000)); - assert_eq!(info.total_token_usage.total_tokens, 272_000); + assert_eq!(info.model_context_window, Some(EFFECTIVE_CONTEXT_WINDOW)); + assert_eq!( + info.total_token_usage.total_tokens, + EFFECTIVE_CONTEXT_WINDOW + ); let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await; let expected_context_window_message = CodexErr::ContextWindowExceeded.to_string(); diff --git a/codex-rs/core/tests/suite/compact.rs b/codex-rs/core/tests/suite/compact.rs index 82750e85..bd760b87 100644 --- a/codex-rs/core/tests/suite/compact.rs +++ b/codex-rs/core/tests/suite/compact.rs @@ -19,6 +19,7 @@ use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_completed_with_tokens; use core_test_support::responses::ev_function_call; +use core_test_support::responses::mount_sse_once; use core_test_support::responses::mount_sse_once_match; use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; @@ -43,6 +44,7 @@ const CONTEXT_LIMIT_MESSAGE: &str = "Your input exceeds the context window of this model. Please adjust your input and try again."; const DUMMY_FUNCTION_NAME: &str = "unsupported_tool"; const DUMMY_CALL_ID: &str = "call-multi-auto"; +const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push"; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn summarize_context_three_requests_and_instructions() { @@ -860,3 +862,97 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_ "second auto compact request should include the summarization prompt" ); } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn auto_compact_triggers_after_function_call_over_95_percent_usage() { + skip_if_no_network!(); + + let server = start_mock_server().await; + + let context_window = 100; + let limit = context_window * 90 / 100; + let over_limit_tokens = context_window * 95 / 100 + 1; + + let first_turn = sse(vec![ + ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"), + ev_completed_with_tokens("r1", 50), + ]); + let function_call_follow_up = sse(vec![ + ev_assistant_message("m2", FINAL_REPLY), + ev_completed_with_tokens("r2", over_limit_tokens), + ]); + let auto_compact_turn = sse(vec![ + ev_assistant_message("m3", AUTO_SUMMARY_TEXT), + ev_completed_with_tokens("r3", 10), + ]); + let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]); + + // Mount responses in order and keep mocks only for the ones we assert on. + let first_turn_mock = mount_sse_once(&server, first_turn).await; + let follow_up_mock = mount_sse_once(&server, function_call_follow_up).await; + let auto_compact_mock = mount_sse_once(&server, auto_compact_turn).await; + // We don't assert on the post-compact request, so no need to keep its mock. + mount_sse_once(&server, post_auto_compact_turn).await; + + let model_provider = ModelProviderInfo { + base_url: Some(format!("{}/v1", server.uri())), + ..built_in_model_providers()["openai"].clone() + }; + + let home = TempDir::new().unwrap(); + let mut config = load_default_config_for_test(&home); + config.model_provider = model_provider; + config.model_context_window = Some(context_window); + config.model_auto_compact_token_limit = Some(limit); + + let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy")) + .new_conversation(config) + .await + .unwrap() + .conversation; + + codex + .submit(Op::UserInput { + items: vec![InputItem::Text { + text: FUNCTION_CALL_LIMIT_MSG.into(), + }], + }) + .await + .unwrap(); + + wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await; + + // Assert first request captured expected user message that triggers function call. + let first_request = first_turn_mock.single_request().input(); + assert!( + first_request.iter().any(|item| { + item.get("type").and_then(|value| value.as_str()) == Some("message") + && item + .get("content") + .and_then(|content| content.as_array()) + .and_then(|entries| entries.first()) + .and_then(|entry| entry.get("text")) + .and_then(|value| value.as_str()) + == Some(FUNCTION_CALL_LIMIT_MSG) + }), + "first request should include the user message that triggers the function call" + ); + + let function_call_output = follow_up_mock + .single_request() + .function_call_output(DUMMY_CALL_ID); + let output_text = function_call_output + .get("output") + .and_then(|value| value.as_str()) + .unwrap_or_default(); + assert!( + output_text.contains(DUMMY_FUNCTION_NAME), + "function call output should be sent before auto compact" + ); + + let auto_compact_body = auto_compact_mock.single_request().body_json().to_string(); + assert!( + auto_compact_body.contains("You have exceeded the maximum number of tokens"), + "auto compact request should include the summarization prompt after exceeding 95% (limit {limit})" + ); +} diff --git a/codex-rs/exec/src/exec_events.rs b/codex-rs/exec/src/exec_events.rs index 606cf918..032b912f 100644 --- a/codex-rs/exec/src/exec_events.rs +++ b/codex-rs/exec/src/exec_events.rs @@ -57,11 +57,11 @@ pub struct TurnFailedEvent { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)] pub struct Usage { /// The number of input tokens used during the turn. - pub input_tokens: u64, + pub input_tokens: i64, /// The number of cached input tokens used during the turn. - pub cached_input_tokens: u64, + pub cached_input_tokens: i64, /// The number of output tokens used during the turn. - pub output_tokens: u64, + pub output_tokens: i64, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] diff --git a/codex-rs/otel/src/otel_event_manager.rs b/codex-rs/otel/src/otel_event_manager.rs index c02ea19f..666d83f8 100644 --- a/codex-rs/otel/src/otel_event_manager.rs +++ b/codex-rs/otel/src/otel_event_manager.rs @@ -86,8 +86,8 @@ impl OtelEventManager { provider_name: &str, reasoning_effort: Option, reasoning_summary: ReasoningSummary, - context_window: Option, - max_output_tokens: Option, + context_window: Option, + max_output_tokens: Option, auto_compact_token_limit: Option, approval_policy: AskForApproval, sandbox_policy: SandboxPolicy, @@ -281,11 +281,11 @@ impl OtelEventManager { pub fn sse_event_completed( &self, - input_token_count: u64, - output_token_count: u64, - cached_token_count: Option, - reasoning_token_count: Option, - tool_token_count: u64, + input_token_count: i64, + output_token_count: i64, + cached_token_count: Option, + reasoning_token_count: Option, + tool_token_count: i64, ) { tracing::event!( tracing::Level::INFO, diff --git a/codex-rs/protocol/src/num_format.rs b/codex-rs/protocol/src/num_format.rs index 72a4ee72..2c64939b 100644 --- a/codex-rs/protocol/src/num_format.rs +++ b/codex-rs/protocol/src/num_format.rs @@ -22,27 +22,28 @@ fn formatter() -> &'static DecimalFormatter { FORMATTER.get_or_init(|| make_local_formatter().unwrap_or_else(make_en_us_formatter)) } -/// Format a u64 with locale-aware digit separators (e.g. "12345" -> "12,345" +/// Format an i64 with locale-aware digit separators (e.g. "12345" -> "12,345" /// for en-US). -pub fn format_with_separators(n: u64) -> String { +pub fn format_with_separators(n: i64) -> String { formatter().format(&Decimal::from(n)).to_string() } -fn format_si_suffix_with_formatter(n: u64, formatter: &DecimalFormatter) -> String { +fn format_si_suffix_with_formatter(n: i64, formatter: &DecimalFormatter) -> String { + let n = n.max(0); if n < 1000 { return formatter.format(&Decimal::from(n)).to_string(); } // Format `n / scale` with the requested number of fractional digits. - let format_scaled = |n: u64, scale: u64, frac_digits: u32| -> String { + let format_scaled = |n: i64, scale: i64, frac_digits: u32| -> String { let value = n as f64 / scale as f64; - let scaled: u64 = (value * 10f64.powi(frac_digits as i32)).round() as u64; + let scaled: i64 = (value * 10f64.powi(frac_digits as i32)).round() as i64; let mut dec = Decimal::from(scaled); dec.multiply_pow10(-(frac_digits as i16)); formatter.format(&dec).to_string() }; - const UNITS: [(u64, &str); 3] = [(1_000, "K"), (1_000_000, "M"), (1_000_000_000, "G")]; + const UNITS: [(i64, &str); 3] = [(1_000, "K"), (1_000_000, "M"), (1_000_000_000, "G")]; let f = n as f64; for &(scale, suffix) in &UNITS { if (100.0 * f / scale as f64).round() < 1000.0 { @@ -57,7 +58,7 @@ fn format_si_suffix_with_formatter(n: u64, formatter: &DecimalFormatter) -> Stri // Above 1000G, keep whole‑G precision. format!( "{}G", - format_with_separators(((n as f64) / 1e9).round() as u64) + format_with_separators(((n as f64) / 1e9).round() as i64) ) } @@ -67,7 +68,7 @@ fn format_si_suffix_with_formatter(n: u64, formatter: &DecimalFormatter) -> Stri /// - 999 -> "999" /// - 1200 -> "1.20K" /// - 123456789 -> "123M" -pub fn format_si_suffix(n: u64) -> String { +pub fn format_si_suffix(n: i64) -> String { format_si_suffix_with_formatter(n, formatter()) } @@ -78,7 +79,7 @@ mod tests { #[test] fn kmg() { let formatter = make_en_us_formatter(); - let fmt = |n: u64| format_si_suffix_with_formatter(n, &formatter); + let fmt = |n: i64| format_si_suffix_with_formatter(n, &formatter); assert_eq!(fmt(0), "0"); assert_eq!(fmt(999), "999"); assert_eq!(fmt(1_000), "1.00K"); diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 5429e4d4..495768d9 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -545,21 +545,21 @@ pub struct TaskCompleteEvent { #[derive(Debug, Clone, Deserialize, Serialize, TS)] pub struct TaskStartedEvent { - pub model_context_window: Option, + pub model_context_window: Option, } #[derive(Debug, Clone, Deserialize, Serialize, Default, TS)] pub struct TokenUsage { #[ts(type = "number")] - pub input_tokens: u64, + pub input_tokens: i64, #[ts(type = "number")] - pub cached_input_tokens: u64, + pub cached_input_tokens: i64, #[ts(type = "number")] - pub output_tokens: u64, + pub output_tokens: i64, #[ts(type = "number")] - pub reasoning_output_tokens: u64, + pub reasoning_output_tokens: i64, #[ts(type = "number")] - pub total_tokens: u64, + pub total_tokens: i64, } #[derive(Debug, Clone, Deserialize, Serialize, TS)] @@ -567,14 +567,14 @@ pub struct TokenUsageInfo { pub total_token_usage: TokenUsage, pub last_token_usage: TokenUsage, #[ts(type = "number | null")] - pub model_context_window: Option, + pub model_context_window: Option, } impl TokenUsageInfo { pub fn new_or_append( info: &Option, last: &Option, - model_context_window: Option, + model_context_window: Option, ) -> Option { if info.is_none() && last.is_none() { return None; @@ -599,9 +599,9 @@ impl TokenUsageInfo { self.last_token_usage = last.clone(); } - pub fn fill_to_context_window(&mut self, context_window: u64) { + pub fn fill_to_context_window(&mut self, context_window: i64) { let previous_total = self.total_token_usage.total_tokens; - let delta = context_window.saturating_sub(previous_total); + let delta = (context_window - previous_total).max(0); self.model_context_window = Some(context_window); self.total_token_usage = TokenUsage { @@ -614,7 +614,7 @@ impl TokenUsageInfo { }; } - pub fn full_context_window(context_window: u64) -> Self { + pub fn full_context_window(context_window: i64) -> Self { let mut info = Self { total_token_usage: TokenUsage::default(), last_token_usage: TokenUsage::default(), @@ -643,40 +643,39 @@ pub struct RateLimitWindow { pub used_percent: f64, /// Rolling window duration, in minutes. #[ts(type = "number | null")] - pub window_minutes: Option, + pub window_minutes: Option, /// Timestamp (RFC3339) when the window resets. #[ts(type = "string | null")] pub resets_at: Option, } // Includes prompts, tools and space to call compact. -const BASELINE_TOKENS: u64 = 12000; +const BASELINE_TOKENS: i64 = 12000; impl TokenUsage { pub fn is_zero(&self) -> bool { self.total_tokens == 0 } - pub fn cached_input(&self) -> u64 { - self.cached_input_tokens + pub fn cached_input(&self) -> i64 { + self.cached_input_tokens.max(0) } - pub fn non_cached_input(&self) -> u64 { - self.input_tokens.saturating_sub(self.cached_input()) + pub fn non_cached_input(&self) -> i64 { + (self.input_tokens - self.cached_input()).max(0) } /// Primary count for display as a single absolute value: non-cached input + output. - pub fn blended_total(&self) -> u64 { - self.non_cached_input() + self.output_tokens + pub fn blended_total(&self) -> i64 { + (self.non_cached_input() + self.output_tokens.max(0)).max(0) } /// For estimating what % of the model's context window is used, we need to account /// for reasoning output tokens from prior turns being dropped from the context window. /// We approximate this here by subtracting reasoning output tokens from the total. /// This will be off for the current turn and pending function calls. - pub fn tokens_in_context_window(&self) -> u64 { - self.total_tokens - .saturating_sub(self.reasoning_output_tokens) + pub fn tokens_in_context_window(&self) -> i64 { + (self.total_tokens - self.reasoning_output_tokens).max(0) } /// Estimate the remaining user-controllable percentage of the model's context window. @@ -689,17 +688,17 @@ impl TokenUsage { /// This normalizes both the numerator and denominator by subtracting the /// baseline, so immediately after the first prompt the UI shows 100% left /// and trends toward 0% as the user fills the effective window. - pub fn percent_of_context_window_remaining(&self, context_window: u64) -> u8 { + pub fn percent_of_context_window_remaining(&self, context_window: i64) -> i64 { if context_window <= BASELINE_TOKENS { return 0; } let effective_window = context_window - BASELINE_TOKENS; - let used = self - .tokens_in_context_window() - .saturating_sub(BASELINE_TOKENS); - let remaining = effective_window.saturating_sub(used); - ((remaining as f32 / effective_window as f32) * 100.0).clamp(0.0, 100.0) as u8 + let used = (self.tokens_in_context_window() - BASELINE_TOKENS).max(0); + let remaining = (effective_window - used).max(0); + ((remaining as f64 / effective_window as f64) * 100.0) + .clamp(0.0, 100.0) + .round() as i64 } /// In-place element-wise sum of token counts. diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 56a6e44d..a7793205 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -108,7 +108,7 @@ pub(crate) struct ChatComposer { custom_prompts: Vec, footer_mode: FooterMode, footer_hint_override: Option>, - context_window_percent: Option, + context_window_percent: Option, } /// Popup state – at most one can be visible at any time. @@ -1511,7 +1511,7 @@ impl ChatComposer { self.is_task_running = running; } - pub(crate) fn set_context_window_percent(&mut self, percent: Option) { + pub(crate) fn set_context_window_percent(&mut self, percent: Option) { if self.context_window_percent != percent { self.context_window_percent = percent; } diff --git a/codex-rs/tui/src/bottom_pane/footer.rs b/codex-rs/tui/src/bottom_pane/footer.rs index 6e92a0ce..79d7c60f 100644 --- a/codex-rs/tui/src/bottom_pane/footer.rs +++ b/codex-rs/tui/src/bottom_pane/footer.rs @@ -17,7 +17,7 @@ pub(crate) struct FooterProps { pub(crate) esc_backtrack_hint: bool, pub(crate) use_shift_enter_hint: bool, pub(crate) is_task_running: bool, - pub(crate) context_window_percent: Option, + pub(crate) context_window_percent: Option, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -221,8 +221,8 @@ fn build_columns(entries: Vec>) -> Vec> { .collect() } -fn context_window_line(percent: Option) -> Line<'static> { - let percent = percent.unwrap_or(100); +fn context_window_line(percent: Option) -> Line<'static> { + let percent = percent.unwrap_or(100).clamp(0, 100); Line::from(vec![Span::from(format!("{percent}% context left")).dim()]) } diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index 8d9e84f2..69405cd8 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -70,7 +70,7 @@ pub(crate) struct BottomPane { status: Option, /// Queued user messages to show under the status indicator. queued_user_messages: Vec, - context_window_percent: Option, + context_window_percent: Option, } pub(crate) struct BottomPaneParams { @@ -357,7 +357,7 @@ impl BottomPane { } } - pub(crate) fn set_context_window_percent(&mut self, percent: Option) { + pub(crate) fn set_context_window_percent(&mut self, percent: Option) { if self.context_window_percent == percent { return; } diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 9f308cd0..8dc6454e 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -142,9 +142,9 @@ impl RateLimitWarningState { fn take_warnings( &mut self, secondary_used_percent: Option, - secondary_window_minutes: Option, + secondary_window_minutes: Option, primary_used_percent: Option, - primary_window_minutes: Option, + primary_window_minutes: Option, ) -> Vec { let reached_secondary_cap = matches!(secondary_used_percent, Some(percent) if percent == 100.0); @@ -195,12 +195,14 @@ impl RateLimitWarningState { } } -pub(crate) fn get_limits_duration(windows_minutes: u64) -> String { - const MINUTES_PER_HOUR: u64 = 60; - const MINUTES_PER_DAY: u64 = 24 * MINUTES_PER_HOUR; - const MINUTES_PER_WEEK: u64 = 7 * MINUTES_PER_DAY; - const MINUTES_PER_MONTH: u64 = 30 * MINUTES_PER_DAY; - const ROUNDING_BIAS_MINUTES: u64 = 3; +pub(crate) fn get_limits_duration(windows_minutes: i64) -> String { + const MINUTES_PER_HOUR: i64 = 60; + const MINUTES_PER_DAY: i64 = 24 * MINUTES_PER_HOUR; + const MINUTES_PER_WEEK: i64 = 7 * MINUTES_PER_DAY; + const MINUTES_PER_MONTH: i64 = 30 * MINUTES_PER_DAY; + const ROUNDING_BIAS_MINUTES: i64 = 3; + + let windows_minutes = windows_minutes.max(0); if windows_minutes <= MINUTES_PER_DAY.saturating_add(ROUNDING_BIAS_MINUTES) { let adjusted = windows_minutes.saturating_add(ROUNDING_BIAS_MINUTES); diff --git a/codex-rs/tui/src/status/card.rs b/codex-rs/tui/src/status/card.rs index 4832713b..a077a926 100644 --- a/codex-rs/tui/src/status/card.rs +++ b/codex-rs/tui/src/status/card.rs @@ -31,16 +31,16 @@ use super::rate_limits::render_status_limit_progress_bar; #[derive(Debug, Clone)] struct StatusContextWindowData { - percent_remaining: u8, - tokens_in_context: u64, - window: u64, + percent_remaining: i64, + tokens_in_context: i64, + window: i64, } #[derive(Debug, Clone)] pub(crate) struct StatusTokenUsageData { - total: u64, - input: u64, - output: u64, + total: i64, + input: i64, + output: i64, context_window: Option, } diff --git a/codex-rs/tui/src/status/helpers.rs b/codex-rs/tui/src/status/helpers.rs index 1889d7f4..59362e2e 100644 --- a/codex-rs/tui/src/status/helpers.rs +++ b/codex-rs/tui/src/status/helpers.rs @@ -103,7 +103,8 @@ pub(crate) fn compose_account_display(config: &Config) -> Option String { +pub(crate) fn format_tokens_compact(value: i64) -> String { + let value = value.max(0); if value == 0 { return "0".to_string(); } @@ -111,14 +112,15 @@ pub(crate) fn format_tokens_compact(value: u64) -> String { return value.to_string(); } + let value_f64 = value as f64; let (scaled, suffix) = if value >= 1_000_000_000_000 { - (value as f64 / 1_000_000_000_000.0, "T") + (value_f64 / 1_000_000_000_000.0, "T") } else if value >= 1_000_000_000 { - (value as f64 / 1_000_000_000.0, "B") + (value_f64 / 1_000_000_000.0, "B") } else if value >= 1_000_000 { - (value as f64 / 1_000_000.0, "M") + (value_f64 / 1_000_000.0, "M") } else { - (value as f64 / 1_000.0, "K") + (value_f64 / 1_000.0, "K") }; let decimals = if scaled < 10.0 { diff --git a/codex-rs/tui/src/status/rate_limits.rs b/codex-rs/tui/src/status/rate_limits.rs index f6a9c06a..f0c0c973 100644 --- a/codex-rs/tui/src/status/rate_limits.rs +++ b/codex-rs/tui/src/status/rate_limits.rs @@ -27,7 +27,7 @@ pub(crate) enum StatusRateLimitData { pub(crate) struct RateLimitWindowDisplay { pub used_percent: f64, pub resets_at: Option, - pub window_minutes: Option, + pub window_minutes: Option, } impl RateLimitWindowDisplay {