From 28395df957dbbe2acd4944ef4c42501eb013f8d3 Mon Sep 17 00:00:00 2001 From: ae Date: Thu, 7 Aug 2025 01:13:36 -0700 Subject: [PATCH] [fix] fix absolute and % token counts (#1931) - For absolute, use non-cached input + output. - For estimating what % of the model's context window is used, we need to account for reasoning output tokens from prior turns being dropped from the context window. We approximate this here by subtracting reasoning output tokens from the total. This will be off for the current turn and pending function calls. We can improve it later. --- codex-rs/core/src/protocol.rs | 41 +++++++++++++++---- .../src/event_processor_with_human_output.rs | 5 +-- codex-rs/tui/src/history_cell.rs | 18 ++------ 3 files changed, 39 insertions(+), 25 deletions(-) diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index e61fc0c3..c789798b 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -448,6 +448,28 @@ impl TokenUsage { pub fn is_zero(&self) -> bool { self.total_tokens == 0 } + + pub fn cached_input(&self) -> u64 { + self.cached_input_tokens.unwrap_or(0) + } + + pub fn non_cached_input(&self) -> u64 { + self.input_tokens.saturating_sub(self.cached_input()) + } + + /// Primary count for display as a single absolute value: non-cached input + output. + pub fn blended_total(&self) -> u64 { + self.non_cached_input() + self.output_tokens + } + + /// For estimating what % of the model's context window is used, we need to account + /// for reasoning output tokens from prior turns being dropped from the context window. + /// We approximate this here by subtracting reasoning output tokens from the total. + /// This will be off for the current turn and pending function calls. + pub fn tokens_in_context_window(&self) -> u64 { + self.total_tokens + .saturating_sub(self.reasoning_output_tokens.unwrap_or(0)) + } } #[derive(Debug, Clone, Deserialize, Serialize)] @@ -463,17 +485,20 @@ impl From for FinalOutput { impl fmt::Display for FinalOutput { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let u = &self.token_usage; + let token_usage = &self.token_usage; write!( f, "Token usage: total={} input={}{} output={}{}", - u.total_tokens, - u.input_tokens, - u.cached_input_tokens - .map(|c| format!(" (cached {c})")) - .unwrap_or_default(), - u.output_tokens, - u.reasoning_output_tokens + token_usage.blended_total(), + token_usage.non_cached_input(), + if token_usage.cached_input() > 0 { + format!(" (+ {} cached)", token_usage.cached_input()) + } else { + String::new() + }, + token_usage.output_tokens, + token_usage + .reasoning_output_tokens .map(|r| format!(" (reasoning {r})")) .unwrap_or_default() ) diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 6b03ed78..a2ae8131 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -21,7 +21,6 @@ use codex_core::protocol::PatchApplyBeginEvent; use codex_core::protocol::PatchApplyEndEvent; use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::TaskCompleteEvent; -use codex_core::protocol::TokenUsage; use codex_core::protocol::TurnDiffEvent; use owo_colors::OwoColorize; use owo_colors::Style; @@ -183,8 +182,8 @@ impl EventProcessor for EventProcessorWithHumanOutput { } return CodexStatus::InitiateShutdown; } - EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => { - ts_println!(self, "tokens used: {total_tokens}"); + EventMsg::TokenCount(token_usage) => { + ts_println!(self, "tokens used: {}", token_usage.blended_total()); } EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => { if !self.answer_started { diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index 8df5340f..6beb7975 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -474,27 +474,17 @@ impl HistoryCell { lines.push(Line::from("token usage".bold())); lines.push(Line::from(vec![ " input: ".bold(), - usage.input_tokens.to_string().into(), - ])); - lines.push(Line::from(vec![ - " cached input: ".bold(), - usage.cached_input_tokens.unwrap_or(0).to_string().into(), + usage.non_cached_input().to_string().into(), + " ".into(), + format!("(+ {} cached)", usage.cached_input()).into(), ])); lines.push(Line::from(vec![ " output: ".bold(), usage.output_tokens.to_string().into(), ])); - lines.push(Line::from(vec![ - " reasoning output: ".bold(), - usage - .reasoning_output_tokens - .unwrap_or(0) - .to_string() - .into(), - ])); lines.push(Line::from(vec![ " total: ".bold(), - usage.total_tokens.to_string().into(), + usage.blended_total().to_string().into(), ])); lines.push(Line::from(""));