diff --git a/codex-rs/config.md b/codex-rs/config.md
index 14d5fd22..bb8b6716 100644
--- a/codex-rs/config.md
+++ b/codex-rs/config.md
@@ -407,6 +407,16 @@ Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the
 hide_agent_reasoning = true   # defaults to false
 ```
 
+## model_context_window
+
+The size of the context window for the model, in tokens.
+
+In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation.
+
+## model_max_output_tokens
+
+This is analogous to `model_context_window`, but for the maximum number of output tokens for the model.
+
 ## project_doc_max_bytes
 
 Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs
index f381c72e..12c5b7af 100644
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -215,6 +215,7 @@ where
                 let _ = tx_event
                     .send(Ok(ResponseEvent::Completed {
                         response_id: String::new(),
+                        token_usage: None,
                     }))
                     .await;
                 return;
@@ -232,6 +233,7 @@ where
             let _ = tx_event
                 .send(Ok(ResponseEvent::Completed {
                     response_id: String::new(),
+                    token_usage: None,
                 }))
                 .await;
             return;
@@ -317,6 +319,7 @@ where
                 let _ = tx_event
                     .send(Ok(ResponseEvent::Completed {
                         response_id: String::new(),
+                        token_usage: None,
                     }))
                     .await;
 
@@ -394,7 +397,10 @@ where
                     // Not an assistant message – forward immediately.
                     return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
                 }
-                Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => {
+                Poll::Ready(Some(Ok(ResponseEvent::Completed {
+                    response_id,
+                    token_usage,
+                }))) => {
                     if !this.cumulative.is_empty() {
                         let aggregated_item = crate::models::ResponseItem::Message {
                             role: "assistant".to_string(),
@@ -404,7 +410,10 @@ where
                         };
 
                         // Buffer Completed so it is returned *after* the aggregated message.
-                        this.pending_completed = Some(ResponseEvent::Completed { response_id });
+                        this.pending_completed = Some(ResponseEvent::Completed {
+                            response_id,
+                            token_usage,
+                        });
 
                         return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
                             aggregated_item,
@@ -412,7 +421,10 @@ where
                     }
 
                     // Nothing aggregated – forward Completed directly.
-                    return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id })));
+                    return Poll::Ready(Some(Ok(ResponseEvent::Completed {
+                        response_id,
+                        token_usage,
+                    })));
                 } // No other `Ok` variants exist at the moment, continue polling.
             }
         }
diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs
index aff83888..4770796d 100644
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -35,6 +35,7 @@ use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
 use crate::models::ResponseItem;
 use crate::openai_tools::create_tools_json_for_responses_api;
+use crate::protocol::TokenUsage;
 use crate::util::backoff;
 
 #[derive(Clone)]
@@ -210,6 +211,38 @@ struct SseEvent {
 #[derive(Debug, Deserialize)]
 struct ResponseCompleted {
     id: String,
+    usage: Option<ResponseCompletedUsage>,
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedUsage {
+    input_tokens: u64,
+    input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
+    output_tokens: u64,
+    output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
+    total_tokens: u64,
+}
+
+impl From<ResponseCompletedUsage> for TokenUsage {
+    fn from(val: ResponseCompletedUsage) -> Self {
+        TokenUsage {
+            input_tokens: val.input_tokens,
+            cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
+            output_tokens: val.output_tokens,
+            reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
+            total_tokens: val.total_tokens,
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedInputTokensDetails {
+    cached_tokens: u64,
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedOutputTokensDetails {
+    reasoning_tokens: u64,
 }
 
 async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
@@ -221,7 +254,7 @@ where
     // If the stream stays completely silent for an extended period treat it as disconnected.
     let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
     // The response id returned from the "complete" message.
-    let mut response_id = None;
+    let mut response_completed: Option<ResponseCompleted> = None;
 
     loop {
         let sse = match timeout(idle_timeout, stream.next()).await {
@@ -233,9 +266,15 @@ where
                 return;
             }
             Ok(None) => {
-                match response_id {
-                    Some(response_id) => {
-                        let event = ResponseEvent::Completed { response_id };
+                match response_completed {
+                    Some(ResponseCompleted {
+                        id: response_id,
+                        usage,
+                    }) => {
+                        let event = ResponseEvent::Completed {
+                            response_id,
+                            token_usage: usage.map(Into::into),
+                        };
                         let _ = tx_event.send(Ok(event)).await;
                     }
                     None => {
@@ -301,7 +340,7 @@ where
                 if let Some(resp_val) = event.response {
                     match serde_json::from_value::<ResponseCompleted>(resp_val) {
                         Ok(r) => {
-                            response_id = Some(r.id);
+                            response_completed = Some(r);
                         }
                         Err(e) => {
                             debug!("failed to parse ResponseCompleted: {e}");
diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs
index a2633475..e17cf22c 100644
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -2,6 +2,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::error::Result;
 use crate::models::ResponseItem;
+use crate::protocol::TokenUsage;
 use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
 use futures::Stream;
 use serde::Serialize;
@@ -51,7 +52,10 @@ impl Prompt {
 #[derive(Debug)]
 pub enum ResponseEvent {
     OutputItemDone(ResponseItem),
-    Completed { response_id: String },
+    Completed {
+        response_id: String,
+        token_usage: Option<TokenUsage>,
+    },
 }
 
 #[derive(Debug, Serialize)]
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index e12a3a60..a43f75a7 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1078,7 +1078,20 @@ async fn try_run_turn(
                 let response = handle_response_item(sess, sub_id, item.clone()).await?;
                 output.push(ProcessedResponseItem { item, response });
             }
-            ResponseEvent::Completed { response_id } => {
+            ResponseEvent::Completed {
+                response_id,
+                token_usage,
+            } => {
+                if let Some(token_usage) = token_usage {
+                    sess.tx_event
+                        .send(Event {
+                            id: sub_id.to_string(),
+                            msg: EventMsg::TokenCount(token_usage),
+                        })
+                        .await
+                        .ok();
+                }
+
                 let mut state = sess.state.lock().unwrap();
                 state.previous_response_id = Some(response_id);
                 break;
diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs
index e01bb3f4..6652d7c7 100644
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -10,6 +10,7 @@ use crate::config_types::UriBasedFileOpener;
 use crate::flags::OPENAI_DEFAULT_MODEL;
 use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::built_in_model_providers;
+use crate::openai_model_info::get_model_info;
 use crate::protocol::AskForApproval;
 use crate::protocol::SandboxPolicy;
 use dirs::home_dir;
@@ -30,6 +31,12 @@ pub struct Config {
     /// Optional override of model selection.
     pub model: String,
 
+    /// Size of the context window for the model, in tokens.
+    pub model_context_window: Option<u64>,
+
+    /// Maximum number of output tokens.
+    pub model_max_output_tokens: Option<u64>,
+
     /// Key into the model_providers map that specifies which provider to use.
     pub model_provider_id: String,
 
@@ -234,6 +241,12 @@ pub struct ConfigToml {
     /// Provider to use from the model_providers map.
     pub model_provider: Option<String>,
 
+    /// Size of the context window for the model, in tokens.
+    pub model_context_window: Option<u64>,
+
+    /// Maximum number of output tokens.
+    pub model_max_output_tokens: Option<u64>,
+
     /// Default approval policy for executing commands.
     pub approval_policy: Option<AskForApproval>,
 
@@ -387,11 +400,23 @@ impl Config {
 
         let history = cfg.history.unwrap_or_default();
 
+        let model = model
+            .or(config_profile.model)
+            .or(cfg.model)
+            .unwrap_or_else(default_model);
+        let openai_model_info = get_model_info(&model);
+        let model_context_window = cfg
+            .model_context_window
+            .or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
+        let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
+            openai_model_info
+                .as_ref()
+                .map(|info| info.max_output_tokens)
+        });
         let config = Self {
-            model: model
-                .or(config_profile.model)
-                .or(cfg.model)
-                .unwrap_or_else(default_model),
+            model,
+            model_context_window,
+            model_max_output_tokens,
             model_provider_id,
             model_provider,
             cwd: resolved_cwd,
@@ -687,6 +712,8 @@ disable_response_storage = true
         assert_eq!(
             Config {
                 model: "o3".to_string(),
+                model_context_window: Some(200_000),
+                model_max_output_tokens: Some(100_000),
                 model_provider_id: "openai".to_string(),
                 model_provider: fixture.openai_provider.clone(),
                 approval_policy: AskForApproval::Never,
@@ -729,6 +756,8 @@ disable_response_storage = true
         )?;
         let expected_gpt3_profile_config = Config {
             model: "gpt-3.5-turbo".to_string(),
+            model_context_window: Some(16_385),
+            model_max_output_tokens: Some(4_096),
             model_provider_id: "openai-chat-completions".to_string(),
             model_provider: fixture.openai_chat_completions_provider.clone(),
             approval_policy: AskForApproval::UnlessTrusted,
@@ -786,6 +815,8 @@ disable_response_storage = true
         )?;
         let expected_zdr_profile_config = Config {
             model: "o3".to_string(),
+            model_context_window: Some(200_000),
+            model_max_output_tokens: Some(100_000),
             model_provider_id: "openai".to_string(),
             model_provider: fixture.openai_provider.clone(),
             approval_policy: AskForApproval::OnFailure,
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 16cf1905..6812260c 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -28,6 +28,7 @@ pub use model_provider_info::ModelProviderInfo;
 pub use model_provider_info::WireApi;
 mod models;
 pub mod openai_api_key;
+mod openai_model_info;
 mod openai_tools;
 mod project_doc;
 pub mod protocol;
diff --git a/codex-rs/core/src/openai_model_info.rs b/codex-rs/core/src/openai_model_info.rs
new file mode 100644
index 00000000..9ffd831a
--- /dev/null
+++ b/codex-rs/core/src/openai_model_info.rs
@@ -0,0 +1,71 @@
+/// Metadata about a model, particularly OpenAI models.
+/// We may want to consider including details like the pricing for
+/// input tokens, output tokens, etc., though users will need to be able to
+/// override this in config.toml, as this information can get out of date.
+/// Though this would help present more accurate pricing information in the UI.
+#[derive(Debug)]
+pub(crate) struct ModelInfo {
+    /// Size of the context window in tokens.
+    pub(crate) context_window: u64,
+
+    /// Maximum number of output tokens that can be generated for the model.
+    pub(crate) max_output_tokens: u64,
+}
+
+/// Note details such as what a model like gpt-4o is aliased to may be out of
+/// date.
+pub(crate) fn get_model_info(name: &str) -> Option<ModelInfo> {
+    match name {
+        // https://platform.openai.com/docs/models/o3
+        "o3" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // https://platform.openai.com/docs/models/o4-mini
+        "o4-mini" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // https://platform.openai.com/docs/models/codex-mini-latest
+        "codex-mini-latest" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
+        // https://platform.openai.com/docs/models/gpt-4.1
+        "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
+            context_window: 1_047_576,
+            max_output_tokens: 32_768,
+        }),
+
+        // As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
+        // https://platform.openai.com/docs/models/gpt-4o
+        "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 16_384,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
+        "gpt-4o-2024-05-13" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 4_096,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
+        "gpt-4o-2024-11-20" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 16_384,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-3.5-turbo
+        "gpt-3.5-turbo" => Some(ModelInfo {
+            context_window: 16_385,
+            max_output_tokens: 4_096,
+        }),
+
+        _ => None,
+    }
+}
diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs
index d4aa7698..fa25a2fe 100644
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -275,6 +275,10 @@ pub enum EventMsg {
     /// Agent has completed all actions
     TaskComplete(TaskCompleteEvent),
 
+    /// Token count event, sent periodically to report the number of tokens
+    /// used in the current session.
+    TokenCount(TokenUsage),
+
     /// Agent text output message
     AgentMessage(AgentMessageEvent),
 
@@ -322,6 +326,15 @@ pub struct TaskCompleteEvent {
     pub last_agent_message: Option<String>,
 }
 
+#[derive(Debug, Clone, Deserialize, Serialize, Default)]
+pub struct TokenUsage {
+    pub input_tokens: u64,
+    pub cached_input_tokens: Option<u64>,
+    pub output_tokens: u64,
+    pub reasoning_output_tokens: Option<u64>,
+    pub total_tokens: u64,
+}
+
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct AgentMessageEvent {
     pub message: String,
diff --git a/codex-rs/exec/src/event_processor.rs b/codex-rs/exec/src/event_processor.rs
index e2a8bbb2..5320c572 100644
--- a/codex-rs/exec/src/event_processor.rs
+++ b/codex-rs/exec/src/event_processor.rs
@@ -16,6 +16,7 @@ use codex_core::protocol::McpToolCallEndEvent;
 use codex_core::protocol::PatchApplyBeginEvent;
 use codex_core::protocol::PatchApplyEndEvent;
 use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::TokenUsage;
 use owo_colors::OwoColorize;
 use owo_colors::Style;
 use shlex::try_join;
@@ -180,6 +181,9 @@ impl EventProcessor {
             EventMsg::TaskStarted | EventMsg::TaskComplete(_) => {
                 // Ignore.
             }
+            EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => {
+                ts_println!(self, "tokens used: {total_tokens}");
+            }
             EventMsg::AgentMessage(AgentMessageEvent { message }) => {
                 ts_println!(
                     self,
diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs
index 67c990b0..796a119e 100644
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -162,6 +162,7 @@ pub async fn run_codex_tool_session(
                     }
                     EventMsg::Error(_)
                     | EventMsg::TaskStarted
+                    | EventMsg::TokenCount(_)
                     | EventMsg::AgentReasoning(_)
                     | EventMsg::McpToolCallBegin(_)
                     | EventMsg::McpToolCallEnd(_)
diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs
index 1218f76e..4ec82990 100644
--- a/codex-rs/tui/src/bottom_pane/chat_composer.rs
+++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs
@@ -1,3 +1,4 @@
+use codex_core::protocol::TokenUsage;
 use crossterm::event::KeyEvent;
 use ratatui::buffer::Buffer;
 use ratatui::layout::Alignment;
@@ -24,6 +25,8 @@ const MIN_TEXTAREA_ROWS: usize = 1;
 /// Rows consumed by the border.
 const BORDER_LINES: u16 = 2;
 
+const BASE_PLACEHOLDER_TEXT: &str = "send a message";
+
 /// Result returned when the user interacts with the text area.
 pub enum InputResult {
     Submitted(String),
@@ -40,7 +43,7 @@ pub(crate) struct ChatComposer<'a> {
 impl ChatComposer<'_> {
     pub fn new(has_input_focus: bool, app_event_tx: AppEventSender) -> Self {
         let mut textarea = TextArea::default();
-        textarea.set_placeholder_text("send a message");
+        textarea.set_placeholder_text(BASE_PLACEHOLDER_TEXT);
         textarea.set_cursor_line_style(ratatui::style::Style::default());
 
         let mut this = Self {
@@ -53,6 +56,41 @@ impl ChatComposer<'_> {
         this
     }
 
+    /// Update the cached *context-left* percentage and refresh the placeholder
+    /// text. The UI relies on the placeholder to convey the remaining
+    /// context when the composer is empty.
+    pub(crate) fn set_token_usage(
+        &mut self,
+        token_usage: TokenUsage,
+        model_context_window: Option<u64>,
+    ) {
+        let placeholder = match (token_usage.total_tokens, model_context_window) {
+            (total_tokens, Some(context_window)) => {
+                let percent_remaining: u8 = if context_window > 0 {
+                    // Calculate the percentage of context left.
+                    let percent = 100.0 - (total_tokens as f32 / context_window as f32 * 100.0);
+                    percent.clamp(0.0, 100.0) as u8
+                } else {
+                    // If we don't have a context window, we cannot compute the
+                    // percentage.
+                    100
+                };
+                if percent_remaining > 25 {
+                    format!("{BASE_PLACEHOLDER_TEXT} — {percent_remaining}% context left")
+                } else {
+                    format!(
+                        "{BASE_PLACEHOLDER_TEXT} — {percent_remaining}% context left (consider /compact)"
+                    )
+                }
+            }
+            (total_tokens, None) => {
+                format!("{BASE_PLACEHOLDER_TEXT} — {total_tokens} tokens used")
+            }
+        };
+
+        self.textarea.set_placeholder_text(placeholder);
+    }
+
     /// Record the history metadata advertised by `SessionConfiguredEvent` so
     /// that the composer can navigate cross-session history.
     pub(crate) fn set_history_metadata(&mut self, log_id: u64, entry_count: usize) {
diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs
index c654581c..e3234e99 100644
--- a/codex-rs/tui/src/bottom_pane/mod.rs
+++ b/codex-rs/tui/src/bottom_pane/mod.rs
@@ -2,6 +2,7 @@
 
 use bottom_pane_view::BottomPaneView;
 use bottom_pane_view::ConditionalUpdate;
+use codex_core::protocol::TokenUsage;
 use crossterm::event::KeyEvent;
 use ratatui::buffer::Buffer;
 use ratatui::layout::Rect;
@@ -129,6 +130,18 @@ impl BottomPane<'_> {
         }
     }
 
+    /// Update the *context-window remaining* indicator in the composer. This
+    /// is forwarded directly to the underlying `ChatComposer`.
+    pub(crate) fn set_token_usage(
+        &mut self,
+        token_usage: TokenUsage,
+        model_context_window: Option<u64>,
+    ) {
+        self.composer
+            .set_token_usage(token_usage, model_context_window);
+        self.request_redraw();
+    }
+
     /// Called when the agent requests user approval.
     pub fn push_approval_request(&mut self, request: ApprovalRequest) {
         let request = if let Some(view) = self.active_view.as_mut() {
diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs
index bd5197c7..fad72e3a 100644
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -18,6 +18,7 @@ use codex_core::protocol::McpToolCallEndEvent;
 use codex_core::protocol::Op;
 use codex_core::protocol::PatchApplyBeginEvent;
 use codex_core::protocol::TaskCompleteEvent;
+use codex_core::protocol::TokenUsage;
 use crossterm::event::KeyEvent;
 use ratatui::buffer::Buffer;
 use ratatui::layout::Constraint;
@@ -46,6 +47,7 @@ pub(crate) struct ChatWidget<'a> {
     input_focus: InputFocus,
     config: Config,
     initial_user_message: Option<UserMessage>,
+    token_usage: TokenUsage,
 }
 
 #[derive(Clone, Copy, Eq, PartialEq)]
@@ -131,6 +133,7 @@ impl ChatWidget<'_> {
                 initial_prompt.unwrap_or_default(),
                 initial_images,
             ),
+            token_usage: TokenUsage::default(),
         }
     }
 
@@ -250,6 +253,11 @@ impl ChatWidget<'_> {
                 self.bottom_pane.set_task_running(false);
                 self.request_redraw();
             }
+            EventMsg::TokenCount(token_usage) => {
+                self.token_usage = add_token_usage(&self.token_usage, &token_usage);
+                self.bottom_pane
+                    .set_token_usage(self.token_usage.clone(), self.config.model_context_window);
+            }
             EventMsg::Error(ErrorEvent { message }) => {
                 self.conversation_history.add_error(message);
                 self.bottom_pane.set_task_running(false);
@@ -410,3 +418,31 @@ impl WidgetRef for &ChatWidget<'_> {
         (&self.bottom_pane).render(chunks[1], buf);
     }
 }
+
+fn add_token_usage(current_usage: &TokenUsage, new_usage: &TokenUsage) -> TokenUsage {
+    let cached_input_tokens = match (
+        current_usage.cached_input_tokens,
+        new_usage.cached_input_tokens,
+    ) {
+        (Some(current), Some(new)) => Some(current + new),
+        (Some(current), None) => Some(current),
+        (None, Some(new)) => Some(new),
+        (None, None) => None,
+    };
+    let reasoning_output_tokens = match (
+        current_usage.reasoning_output_tokens,
+        new_usage.reasoning_output_tokens,
+    ) {
+        (Some(current), Some(new)) => Some(current + new),
+        (Some(current), None) => Some(current),
+        (None, Some(new)) => Some(new),
+        (None, None) => None,
+    };
+    TokenUsage {
+        input_tokens: current_usage.input_tokens + new_usage.input_tokens,
+        cached_input_tokens,
+        output_tokens: current_usage.output_tokens + new_usage.output_tokens,
+        reasoning_output_tokens,
+        total_tokens: current_usage.total_tokens + new_usage.total_tokens,
+    }
+}