diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index e61fc0c3..c789798b 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -448,6 +448,28 @@ impl TokenUsage { pub fn is_zero(&self) -> bool { self.total_tokens == 0 } + + pub fn cached_input(&self) -> u64 { + self.cached_input_tokens.unwrap_or(0) + } + + pub fn non_cached_input(&self) -> u64 { + self.input_tokens.saturating_sub(self.cached_input()) + } + + /// Primary count for display as a single absolute value: non-cached input + output. + pub fn blended_total(&self) -> u64 { + self.non_cached_input() + self.output_tokens + } + + /// For estimating what % of the model's context window is used, we need to account + /// for reasoning output tokens from prior turns being dropped from the context window. + /// We approximate this here by subtracting reasoning output tokens from the total. + /// This will be off for the current turn and pending function calls. + pub fn tokens_in_context_window(&self) -> u64 { + self.total_tokens + .saturating_sub(self.reasoning_output_tokens.unwrap_or(0)) + } } #[derive(Debug, Clone, Deserialize, Serialize)] @@ -463,17 +485,20 @@ impl From for FinalOutput { impl fmt::Display for FinalOutput { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let u = &self.token_usage; + let token_usage = &self.token_usage; write!( f, "Token usage: total={} input={}{} output={}{}", - u.total_tokens, - u.input_tokens, - u.cached_input_tokens - .map(|c| format!(" (cached {c})")) - .unwrap_or_default(), - u.output_tokens, - u.reasoning_output_tokens + token_usage.blended_total(), + token_usage.non_cached_input(), + if token_usage.cached_input() > 0 { + format!(" (+ {} cached)", token_usage.cached_input()) + } else { + String::new() + }, + token_usage.output_tokens, + token_usage + .reasoning_output_tokens .map(|r| format!(" (reasoning {r})")) .unwrap_or_default() ) diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 6b03ed78..a2ae8131 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -21,7 +21,6 @@ use codex_core::protocol::PatchApplyBeginEvent; use codex_core::protocol::PatchApplyEndEvent; use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::TaskCompleteEvent; -use codex_core::protocol::TokenUsage; use codex_core::protocol::TurnDiffEvent; use owo_colors::OwoColorize; use owo_colors::Style; @@ -183,8 +182,8 @@ impl EventProcessor for EventProcessorWithHumanOutput { } return CodexStatus::InitiateShutdown; } - EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => { - ts_println!(self, "tokens used: {total_tokens}"); + EventMsg::TokenCount(token_usage) => { + ts_println!(self, "tokens used: {}", token_usage.blended_total()); } EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => { if !self.answer_started { diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index 8df5340f..6beb7975 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -474,27 +474,17 @@ impl HistoryCell { lines.push(Line::from("token usage".bold())); lines.push(Line::from(vec![ " input: ".bold(), - usage.input_tokens.to_string().into(), - ])); - lines.push(Line::from(vec![ - " cached input: ".bold(), - usage.cached_input_tokens.unwrap_or(0).to_string().into(), + usage.non_cached_input().to_string().into(), + " ".into(), + format!("(+ {} cached)", usage.cached_input()).into(), ])); lines.push(Line::from(vec![ " output: ".bold(), usage.output_tokens.to_string().into(), ])); - lines.push(Line::from(vec![ - " reasoning output: ".bold(), - usage - .reasoning_output_tokens - .unwrap_or(0) - .to_string() - .into(), - ])); lines.push(Line::from(vec![ " total: ".bold(), - usage.total_tokens.to_string().into(), + usage.blended_total().to_string().into(), ])); lines.push(Line::from(""));