From fcfe43c7df46836a1c60cec4dfd1591d3036a0c8 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Wed, 25 Jun 2025 23:31:11 -0700 Subject: [PATCH] feat: show number of tokens remaining in UI (#1388) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using the OpenAI Responses API, we now record the `usage` field for a `"response.completed"` event, which includes metrics about the number of tokens consumed. We also introduce `openai_model_info.rs`, which includes current data about the most common OpenAI models available via the API (specifically `context_window` and `max_output_tokens`). If Codex does not recognize the model, you can set `model_context_window` and `model_max_output_tokens` explicitly in `config.toml`. When then introduce a new event type to `protocol.rs`, `TokenCount`, which includes the `TokenUsage` for the most recent turn. Finally, we update the TUI to record the running sum of tokens used so the percentage of available context window remaining can be reported via the placeholder text for the composer: ![Screenshot 2025-06-25 at 11 20 55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49) We could certainly get much fancier with this (such as reporting the estimated cost of the conversation), but for now, we are just trying to achieve feature parity with the TypeScript CLI. Though arguably this improves upon the TypeScript CLI, as the TypeScript CLI uses heuristics to estimate the number of tokens used rather than using the `usage` information directly: https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16 Fixes https://github.com/openai/codex/issues/1242 --- codex-rs/config.md | 10 +++ codex-rs/core/src/chat_completions.rs | 18 ++++- codex-rs/core/src/client.rs | 49 +++++++++++-- codex-rs/core/src/client_common.rs | 6 +- codex-rs/core/src/codex.rs | 15 +++- codex-rs/core/src/config.rs | 39 ++++++++-- codex-rs/core/src/lib.rs | 1 + codex-rs/core/src/openai_model_info.rs | 71 +++++++++++++++++++ codex-rs/core/src/protocol.rs | 13 ++++ codex-rs/exec/src/event_processor.rs | 4 ++ codex-rs/mcp-server/src/codex_tool_runner.rs | 1 + codex-rs/tui/src/bottom_pane/chat_composer.rs | 40 ++++++++++- codex-rs/tui/src/bottom_pane/mod.rs | 13 ++++ codex-rs/tui/src/chatwidget.rs | 36 ++++++++++ 14 files changed, 301 insertions(+), 15 deletions(-) create mode 100644 codex-rs/core/src/openai_model_info.rs diff --git a/codex-rs/config.md b/codex-rs/config.md index 14d5fd22..bb8b6716 100644 --- a/codex-rs/config.md +++ b/codex-rs/config.md @@ -407,6 +407,16 @@ Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the hide_agent_reasoning = true # defaults to false ``` +## model_context_window + +The size of the context window for the model, in tokens. + +In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation. + +## model_max_output_tokens + +This is analogous to `model_context_window`, but for the maximum number of output tokens for the model. + ## project_doc_max_bytes Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB. diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs index f381c72e..12c5b7af 100644 --- a/codex-rs/core/src/chat_completions.rs +++ b/codex-rs/core/src/chat_completions.rs @@ -215,6 +215,7 @@ where let _ = tx_event .send(Ok(ResponseEvent::Completed { response_id: String::new(), + token_usage: None, })) .await; return; @@ -232,6 +233,7 @@ where let _ = tx_event .send(Ok(ResponseEvent::Completed { response_id: String::new(), + token_usage: None, })) .await; return; @@ -317,6 +319,7 @@ where let _ = tx_event .send(Ok(ResponseEvent::Completed { response_id: String::new(), + token_usage: None, })) .await; @@ -394,7 +397,10 @@ where // Not an assistant message – forward immediately. return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))); } - Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => { + Poll::Ready(Some(Ok(ResponseEvent::Completed { + response_id, + token_usage, + }))) => { if !this.cumulative.is_empty() { let aggregated_item = crate::models::ResponseItem::Message { role: "assistant".to_string(), @@ -404,7 +410,10 @@ where }; // Buffer Completed so it is returned *after* the aggregated message. - this.pending_completed = Some(ResponseEvent::Completed { response_id }); + this.pending_completed = Some(ResponseEvent::Completed { + response_id, + token_usage, + }); return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone( aggregated_item, @@ -412,7 +421,10 @@ where } // Nothing aggregated – forward Completed directly. - return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))); + return Poll::Ready(Some(Ok(ResponseEvent::Completed { + response_id, + token_usage, + }))); } // No other `Ok` variants exist at the moment, continue polling. } } diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index aff83888..4770796d 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -35,6 +35,7 @@ use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::WireApi; use crate::models::ResponseItem; use crate::openai_tools::create_tools_json_for_responses_api; +use crate::protocol::TokenUsage; use crate::util::backoff; #[derive(Clone)] @@ -210,6 +211,38 @@ struct SseEvent { #[derive(Debug, Deserialize)] struct ResponseCompleted { id: String, + usage: Option, +} + +#[derive(Debug, Deserialize)] +struct ResponseCompletedUsage { + input_tokens: u64, + input_tokens_details: Option, + output_tokens: u64, + output_tokens_details: Option, + total_tokens: u64, +} + +impl From for TokenUsage { + fn from(val: ResponseCompletedUsage) -> Self { + TokenUsage { + input_tokens: val.input_tokens, + cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens), + output_tokens: val.output_tokens, + reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens), + total_tokens: val.total_tokens, + } + } +} + +#[derive(Debug, Deserialize)] +struct ResponseCompletedInputTokensDetails { + cached_tokens: u64, +} + +#[derive(Debug, Deserialize)] +struct ResponseCompletedOutputTokensDetails { + reasoning_tokens: u64, } async fn process_sse(stream: S, tx_event: mpsc::Sender>) @@ -221,7 +254,7 @@ where // If the stream stays completely silent for an extended period treat it as disconnected. let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS; // The response id returned from the "complete" message. - let mut response_id = None; + let mut response_completed: Option = None; loop { let sse = match timeout(idle_timeout, stream.next()).await { @@ -233,9 +266,15 @@ where return; } Ok(None) => { - match response_id { - Some(response_id) => { - let event = ResponseEvent::Completed { response_id }; + match response_completed { + Some(ResponseCompleted { + id: response_id, + usage, + }) => { + let event = ResponseEvent::Completed { + response_id, + token_usage: usage.map(Into::into), + }; let _ = tx_event.send(Ok(event)).await; } None => { @@ -301,7 +340,7 @@ where if let Some(resp_val) = event.response { match serde_json::from_value::(resp_val) { Ok(r) => { - response_id = Some(r.id); + response_completed = Some(r); } Err(e) => { debug!("failed to parse ResponseCompleted: {e}"); diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index a2633475..e17cf22c 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -2,6 +2,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig; use crate::config_types::ReasoningSummary as ReasoningSummaryConfig; use crate::error::Result; use crate::models::ResponseItem; +use crate::protocol::TokenUsage; use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS; use futures::Stream; use serde::Serialize; @@ -51,7 +52,10 @@ impl Prompt { #[derive(Debug)] pub enum ResponseEvent { OutputItemDone(ResponseItem), - Completed { response_id: String }, + Completed { + response_id: String, + token_usage: Option, + }, } #[derive(Debug, Serialize)] diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index e12a3a60..a43f75a7 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1078,7 +1078,20 @@ async fn try_run_turn( let response = handle_response_item(sess, sub_id, item.clone()).await?; output.push(ProcessedResponseItem { item, response }); } - ResponseEvent::Completed { response_id } => { + ResponseEvent::Completed { + response_id, + token_usage, + } => { + if let Some(token_usage) = token_usage { + sess.tx_event + .send(Event { + id: sub_id.to_string(), + msg: EventMsg::TokenCount(token_usage), + }) + .await + .ok(); + } + let mut state = sess.state.lock().unwrap(); state.previous_response_id = Some(response_id); break; diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index e01bb3f4..6652d7c7 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -10,6 +10,7 @@ use crate::config_types::UriBasedFileOpener; use crate::flags::OPENAI_DEFAULT_MODEL; use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::built_in_model_providers; +use crate::openai_model_info::get_model_info; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; use dirs::home_dir; @@ -30,6 +31,12 @@ pub struct Config { /// Optional override of model selection. pub model: String, + /// Size of the context window for the model, in tokens. + pub model_context_window: Option, + + /// Maximum number of output tokens. + pub model_max_output_tokens: Option, + /// Key into the model_providers map that specifies which provider to use. pub model_provider_id: String, @@ -234,6 +241,12 @@ pub struct ConfigToml { /// Provider to use from the model_providers map. pub model_provider: Option, + /// Size of the context window for the model, in tokens. + pub model_context_window: Option, + + /// Maximum number of output tokens. + pub model_max_output_tokens: Option, + /// Default approval policy for executing commands. pub approval_policy: Option, @@ -387,11 +400,23 @@ impl Config { let history = cfg.history.unwrap_or_default(); + let model = model + .or(config_profile.model) + .or(cfg.model) + .unwrap_or_else(default_model); + let openai_model_info = get_model_info(&model); + let model_context_window = cfg + .model_context_window + .or_else(|| openai_model_info.as_ref().map(|info| info.context_window)); + let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| { + openai_model_info + .as_ref() + .map(|info| info.max_output_tokens) + }); let config = Self { - model: model - .or(config_profile.model) - .or(cfg.model) - .unwrap_or_else(default_model), + model, + model_context_window, + model_max_output_tokens, model_provider_id, model_provider, cwd: resolved_cwd, @@ -687,6 +712,8 @@ disable_response_storage = true assert_eq!( Config { model: "o3".to_string(), + model_context_window: Some(200_000), + model_max_output_tokens: Some(100_000), model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::Never, @@ -729,6 +756,8 @@ disable_response_storage = true )?; let expected_gpt3_profile_config = Config { model: "gpt-3.5-turbo".to_string(), + model_context_window: Some(16_385), + model_max_output_tokens: Some(4_096), model_provider_id: "openai-chat-completions".to_string(), model_provider: fixture.openai_chat_completions_provider.clone(), approval_policy: AskForApproval::UnlessTrusted, @@ -786,6 +815,8 @@ disable_response_storage = true )?; let expected_zdr_profile_config = Config { model: "o3".to_string(), + model_context_window: Some(200_000), + model_max_output_tokens: Some(100_000), model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::OnFailure, diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 16cf1905..6812260c 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -28,6 +28,7 @@ pub use model_provider_info::ModelProviderInfo; pub use model_provider_info::WireApi; mod models; pub mod openai_api_key; +mod openai_model_info; mod openai_tools; mod project_doc; pub mod protocol; diff --git a/codex-rs/core/src/openai_model_info.rs b/codex-rs/core/src/openai_model_info.rs new file mode 100644 index 00000000..9ffd831a --- /dev/null +++ b/codex-rs/core/src/openai_model_info.rs @@ -0,0 +1,71 @@ +/// Metadata about a model, particularly OpenAI models. +/// We may want to consider including details like the pricing for +/// input tokens, output tokens, etc., though users will need to be able to +/// override this in config.toml, as this information can get out of date. +/// Though this would help present more accurate pricing information in the UI. +#[derive(Debug)] +pub(crate) struct ModelInfo { + /// Size of the context window in tokens. + pub(crate) context_window: u64, + + /// Maximum number of output tokens that can be generated for the model. + pub(crate) max_output_tokens: u64, +} + +/// Note details such as what a model like gpt-4o is aliased to may be out of +/// date. +pub(crate) fn get_model_info(name: &str) -> Option { + match name { + // https://platform.openai.com/docs/models/o3 + "o3" => Some(ModelInfo { + context_window: 200_000, + max_output_tokens: 100_000, + }), + + // https://platform.openai.com/docs/models/o4-mini + "o4-mini" => Some(ModelInfo { + context_window: 200_000, + max_output_tokens: 100_000, + }), + + // https://platform.openai.com/docs/models/codex-mini-latest + "codex-mini-latest" => Some(ModelInfo { + context_window: 200_000, + max_output_tokens: 100_000, + }), + + // As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14. + // https://platform.openai.com/docs/models/gpt-4.1 + "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo { + context_window: 1_047_576, + max_output_tokens: 32_768, + }), + + // As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06. + // https://platform.openai.com/docs/models/gpt-4o + "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo { + context_window: 128_000, + max_output_tokens: 16_384, + }), + + // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13 + "gpt-4o-2024-05-13" => Some(ModelInfo { + context_window: 128_000, + max_output_tokens: 4_096, + }), + + // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20 + "gpt-4o-2024-11-20" => Some(ModelInfo { + context_window: 128_000, + max_output_tokens: 16_384, + }), + + // https://platform.openai.com/docs/models/gpt-3.5-turbo + "gpt-3.5-turbo" => Some(ModelInfo { + context_window: 16_385, + max_output_tokens: 4_096, + }), + + _ => None, + } +} diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index d4aa7698..fa25a2fe 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -275,6 +275,10 @@ pub enum EventMsg { /// Agent has completed all actions TaskComplete(TaskCompleteEvent), + /// Token count event, sent periodically to report the number of tokens + /// used in the current session. + TokenCount(TokenUsage), + /// Agent text output message AgentMessage(AgentMessageEvent), @@ -322,6 +326,15 @@ pub struct TaskCompleteEvent { pub last_agent_message: Option, } +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TokenUsage { + pub input_tokens: u64, + pub cached_input_tokens: Option, + pub output_tokens: u64, + pub reasoning_output_tokens: Option, + pub total_tokens: u64, +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct AgentMessageEvent { pub message: String, diff --git a/codex-rs/exec/src/event_processor.rs b/codex-rs/exec/src/event_processor.rs index e2a8bbb2..5320c572 100644 --- a/codex-rs/exec/src/event_processor.rs +++ b/codex-rs/exec/src/event_processor.rs @@ -16,6 +16,7 @@ use codex_core::protocol::McpToolCallEndEvent; use codex_core::protocol::PatchApplyBeginEvent; use codex_core::protocol::PatchApplyEndEvent; use codex_core::protocol::SessionConfiguredEvent; +use codex_core::protocol::TokenUsage; use owo_colors::OwoColorize; use owo_colors::Style; use shlex::try_join; @@ -180,6 +181,9 @@ impl EventProcessor { EventMsg::TaskStarted | EventMsg::TaskComplete(_) => { // Ignore. } + EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => { + ts_println!(self, "tokens used: {total_tokens}"); + } EventMsg::AgentMessage(AgentMessageEvent { message }) => { ts_println!( self, diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index 67c990b0..796a119e 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -162,6 +162,7 @@ pub async fn run_codex_tool_session( } EventMsg::Error(_) | EventMsg::TaskStarted + | EventMsg::TokenCount(_) | EventMsg::AgentReasoning(_) | EventMsg::McpToolCallBegin(_) | EventMsg::McpToolCallEnd(_) diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 1218f76e..4ec82990 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -1,3 +1,4 @@ +use codex_core::protocol::TokenUsage; use crossterm::event::KeyEvent; use ratatui::buffer::Buffer; use ratatui::layout::Alignment; @@ -24,6 +25,8 @@ const MIN_TEXTAREA_ROWS: usize = 1; /// Rows consumed by the border. const BORDER_LINES: u16 = 2; +const BASE_PLACEHOLDER_TEXT: &str = "send a message"; + /// Result returned when the user interacts with the text area. pub enum InputResult { Submitted(String), @@ -40,7 +43,7 @@ pub(crate) struct ChatComposer<'a> { impl ChatComposer<'_> { pub fn new(has_input_focus: bool, app_event_tx: AppEventSender) -> Self { let mut textarea = TextArea::default(); - textarea.set_placeholder_text("send a message"); + textarea.set_placeholder_text(BASE_PLACEHOLDER_TEXT); textarea.set_cursor_line_style(ratatui::style::Style::default()); let mut this = Self { @@ -53,6 +56,41 @@ impl ChatComposer<'_> { this } + /// Update the cached *context-left* percentage and refresh the placeholder + /// text. The UI relies on the placeholder to convey the remaining + /// context when the composer is empty. + pub(crate) fn set_token_usage( + &mut self, + token_usage: TokenUsage, + model_context_window: Option, + ) { + let placeholder = match (token_usage.total_tokens, model_context_window) { + (total_tokens, Some(context_window)) => { + let percent_remaining: u8 = if context_window > 0 { + // Calculate the percentage of context left. + let percent = 100.0 - (total_tokens as f32 / context_window as f32 * 100.0); + percent.clamp(0.0, 100.0) as u8 + } else { + // If we don't have a context window, we cannot compute the + // percentage. + 100 + }; + if percent_remaining > 25 { + format!("{BASE_PLACEHOLDER_TEXT} — {percent_remaining}% context left") + } else { + format!( + "{BASE_PLACEHOLDER_TEXT} — {percent_remaining}% context left (consider /compact)" + ) + } + } + (total_tokens, None) => { + format!("{BASE_PLACEHOLDER_TEXT} — {total_tokens} tokens used") + } + }; + + self.textarea.set_placeholder_text(placeholder); + } + /// Record the history metadata advertised by `SessionConfiguredEvent` so /// that the composer can navigate cross-session history. pub(crate) fn set_history_metadata(&mut self, log_id: u64, entry_count: usize) { diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index c654581c..e3234e99 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -2,6 +2,7 @@ use bottom_pane_view::BottomPaneView; use bottom_pane_view::ConditionalUpdate; +use codex_core::protocol::TokenUsage; use crossterm::event::KeyEvent; use ratatui::buffer::Buffer; use ratatui::layout::Rect; @@ -129,6 +130,18 @@ impl BottomPane<'_> { } } + /// Update the *context-window remaining* indicator in the composer. This + /// is forwarded directly to the underlying `ChatComposer`. + pub(crate) fn set_token_usage( + &mut self, + token_usage: TokenUsage, + model_context_window: Option, + ) { + self.composer + .set_token_usage(token_usage, model_context_window); + self.request_redraw(); + } + /// Called when the agent requests user approval. pub fn push_approval_request(&mut self, request: ApprovalRequest) { let request = if let Some(view) = self.active_view.as_mut() { diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index bd5197c7..fad72e3a 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -18,6 +18,7 @@ use codex_core::protocol::McpToolCallEndEvent; use codex_core::protocol::Op; use codex_core::protocol::PatchApplyBeginEvent; use codex_core::protocol::TaskCompleteEvent; +use codex_core::protocol::TokenUsage; use crossterm::event::KeyEvent; use ratatui::buffer::Buffer; use ratatui::layout::Constraint; @@ -46,6 +47,7 @@ pub(crate) struct ChatWidget<'a> { input_focus: InputFocus, config: Config, initial_user_message: Option, + token_usage: TokenUsage, } #[derive(Clone, Copy, Eq, PartialEq)] @@ -131,6 +133,7 @@ impl ChatWidget<'_> { initial_prompt.unwrap_or_default(), initial_images, ), + token_usage: TokenUsage::default(), } } @@ -250,6 +253,11 @@ impl ChatWidget<'_> { self.bottom_pane.set_task_running(false); self.request_redraw(); } + EventMsg::TokenCount(token_usage) => { + self.token_usage = add_token_usage(&self.token_usage, &token_usage); + self.bottom_pane + .set_token_usage(self.token_usage.clone(), self.config.model_context_window); + } EventMsg::Error(ErrorEvent { message }) => { self.conversation_history.add_error(message); self.bottom_pane.set_task_running(false); @@ -410,3 +418,31 @@ impl WidgetRef for &ChatWidget<'_> { (&self.bottom_pane).render(chunks[1], buf); } } + +fn add_token_usage(current_usage: &TokenUsage, new_usage: &TokenUsage) -> TokenUsage { + let cached_input_tokens = match ( + current_usage.cached_input_tokens, + new_usage.cached_input_tokens, + ) { + (Some(current), Some(new)) => Some(current + new), + (Some(current), None) => Some(current), + (None, Some(new)) => Some(new), + (None, None) => None, + }; + let reasoning_output_tokens = match ( + current_usage.reasoning_output_tokens, + new_usage.reasoning_output_tokens, + ) { + (Some(current), Some(new)) => Some(current + new), + (Some(current), None) => Some(current), + (None, Some(new)) => Some(new), + (None, None) => None, + }; + TokenUsage { + input_tokens: current_usage.input_tokens + new_usage.input_tokens, + cached_input_tokens, + output_tokens: current_usage.output_tokens + new_usage.output_tokens, + reasoning_output_tokens, + total_tokens: current_usage.total_tokens + new_usage.total_tokens, + } +}