diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs
index e61fc0c3..c789798b 100644
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -448,6 +448,28 @@ impl TokenUsage {
     pub fn is_zero(&self) -> bool {
         self.total_tokens == 0
     }
+
+    pub fn cached_input(&self) -> u64 {
+        self.cached_input_tokens.unwrap_or(0)
+    }
+
+    pub fn non_cached_input(&self) -> u64 {
+        self.input_tokens.saturating_sub(self.cached_input())
+    }
+
+    /// Primary count for display as a single absolute value: non-cached input + output.
+    pub fn blended_total(&self) -> u64 {
+        self.non_cached_input() + self.output_tokens
+    }
+
+    /// For estimating what % of the model's context window is used, we need to account
+    /// for reasoning output tokens from prior turns being dropped from the context window.
+    /// We approximate this here by subtracting reasoning output tokens from the total.
+    /// This will be off for the current turn and pending function calls.
+    pub fn tokens_in_context_window(&self) -> u64 {
+        self.total_tokens
+            .saturating_sub(self.reasoning_output_tokens.unwrap_or(0))
+    }
 }
 
 #[derive(Debug, Clone, Deserialize, Serialize)]
@@ -463,17 +485,20 @@ impl From<TokenUsage> for FinalOutput {
 
 impl fmt::Display for FinalOutput {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let u = &self.token_usage;
+        let token_usage = &self.token_usage;
         write!(
             f,
             "Token usage: total={} input={}{} output={}{}",
-            u.total_tokens,
-            u.input_tokens,
-            u.cached_input_tokens
-                .map(|c| format!(" (cached {c})"))
-                .unwrap_or_default(),
-            u.output_tokens,
-            u.reasoning_output_tokens
+            token_usage.blended_total(),
+            token_usage.non_cached_input(),
+            if token_usage.cached_input() > 0 {
+                format!(" (+ {} cached)", token_usage.cached_input())
+            } else {
+                String::new()
+            },
+            token_usage.output_tokens,
+            token_usage
+                .reasoning_output_tokens
                 .map(|r| format!(" (reasoning {r})"))
                 .unwrap_or_default()
         )
diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs
index 6b03ed78..a2ae8131 100644
--- a/codex-rs/exec/src/event_processor_with_human_output.rs
+++ b/codex-rs/exec/src/event_processor_with_human_output.rs
@@ -21,7 +21,6 @@ use codex_core::protocol::PatchApplyBeginEvent;
 use codex_core::protocol::PatchApplyEndEvent;
 use codex_core::protocol::SessionConfiguredEvent;
 use codex_core::protocol::TaskCompleteEvent;
-use codex_core::protocol::TokenUsage;
 use codex_core::protocol::TurnDiffEvent;
 use owo_colors::OwoColorize;
 use owo_colors::Style;
@@ -183,8 +182,8 @@ impl EventProcessor for EventProcessorWithHumanOutput {
                 }
                 return CodexStatus::InitiateShutdown;
             }
-            EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => {
-                ts_println!(self, "tokens used: {total_tokens}");
+            EventMsg::TokenCount(token_usage) => {
+                ts_println!(self, "tokens used: {}", token_usage.blended_total());
             }
             EventMsg::AgentMessageDelta(AgentMessageDeltaEvent { delta }) => {
                 if !self.answer_started {
diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs
index 8df5340f..6beb7975 100644
--- a/codex-rs/tui/src/history_cell.rs
+++ b/codex-rs/tui/src/history_cell.rs
@@ -474,27 +474,17 @@ impl HistoryCell {
         lines.push(Line::from("token usage".bold()));
         lines.push(Line::from(vec![
             "  input: ".bold(),
-            usage.input_tokens.to_string().into(),
-        ]));
-        lines.push(Line::from(vec![
-            "  cached input: ".bold(),
-            usage.cached_input_tokens.unwrap_or(0).to_string().into(),
+            usage.non_cached_input().to_string().into(),
+            " ".into(),
+            format!("(+ {} cached)", usage.cached_input()).into(),
         ]));
         lines.push(Line::from(vec![
             "  output: ".bold(),
             usage.output_tokens.to_string().into(),
         ]));
-        lines.push(Line::from(vec![
-            "  reasoning output: ".bold(),
-            usage
-                .reasoning_output_tokens
-                .unwrap_or(0)
-                .to_string()
-                .into(),
-        ]));
         lines.push(Line::from(vec![
             "  total: ".bold(),
-            usage.total_tokens.to_string().into(),
+            usage.blended_total().to_string().into(),
         ]));
 
         lines.push(Line::from(""));