feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for a `"response.completed"` event, which includes metrics about the number of tokens consumed. We also introduce `openai_model_info.rs`, which includes current data about the most common OpenAI models available via the API (specifically `context_window` and `max_output_tokens`). If Codex does not recognize the model, you can set `model_context_window` and `model_max_output_tokens` explicitly in `config.toml`. When then introduce a new event type to `protocol.rs`, `TokenCount`, which includes the `TokenUsage` for the most recent turn. Finally, we update the TUI to record the running sum of tokens used so the percentage of available context window remaining can be reported via the placeholder text for the composer: ![Screenshot 2025-06-25 at 11 20 55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49) We could certainly get much fancier with this (such as reporting the estimated cost of the conversation), but for now, we are just trying to achieve feature parity with the TypeScript CLI. Though arguably this improves upon the TypeScript CLI, as the TypeScript CLI uses heuristics to estimate the number of tokens used rather than using the `usage` information directly: 296996d74e/codex-cli/src/utils/approximate-tokens-used.ts (L3-L16) Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
parent 296996d74e
commit fcfe43c7df
14 changed files with 301 additions and 15 deletions
--- a/codex-rs/config.md
+++ b/codex-rs/config.md
@@ -407,6 +407,16 @@ Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the
 hide_agent_reasoning = true   # defaults to false
 ```

+## model_context_window
+
+The size of the context window for the model, in tokens.
+
+In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation.
+
+## model_max_output_tokens
+
+This is analogous to `model_context_window`, but for the maximum number of output tokens for the model.
+
 ## project_doc_max_bytes

 Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB.
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -215,6 +215,7 @@ where
                let _ = tx_event
                    .send(Ok(ResponseEvent::Completed {
                        response_id: String::new(),
+                        token_usage: None,
                    }))
                    .await;
                return;
@@ -232,6 +233,7 @@ where
            let _ = tx_event
                .send(Ok(ResponseEvent::Completed {
                    response_id: String::new(),
+                    token_usage: None,
                }))
                .await;
            return;
@@ -317,6 +319,7 @@ where
                let _ = tx_event
                    .send(Ok(ResponseEvent::Completed {
                        response_id: String::new(),
+                        token_usage: None,
                    }))
                    .await;

@@ -394,7 +397,10 @@ where
                    // Not an assistant message – forward immediately.
                    return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
                }
-                Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => {
+                Poll::Ready(Some(Ok(ResponseEvent::Completed {
+                    response_id,
+                    token_usage,
+                }))) => {
                    if !this.cumulative.is_empty() {
                        let aggregated_item = crate::models::ResponseItem::Message {
                            role: "assistant".to_string(),
@@ -404,7 +410,10 @@ where
                        };

                        // Buffer Completed so it is returned *after* the aggregated message.
-                        this.pending_completed = Some(ResponseEvent::Completed { response_id });
+                        this.pending_completed = Some(ResponseEvent::Completed {
+                            response_id,
+                            token_usage,
+                        });

                        return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
                            aggregated_item,
@@ -412,7 +421,10 @@ where
                    }

                    // Nothing aggregated – forward Completed directly.
-                    return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id })));
+                    return Poll::Ready(Some(Ok(ResponseEvent::Completed {
+                        response_id,
+                        token_usage,
+                    })));
                } // No other `Ok` variants exist at the moment, continue polling.
            }
        }
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -35,6 +35,7 @@ use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
 use crate::models::ResponseItem;
 use crate::openai_tools::create_tools_json_for_responses_api;
+use crate::protocol::TokenUsage;
 use crate::util::backoff;

 #[derive(Clone)]
@@ -210,6 +211,38 @@ struct SseEvent {
 #[derive(Debug, Deserialize)]
 struct ResponseCompleted {
    id: String,
+    usage: Option<ResponseCompletedUsage>,
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedUsage {
+    input_tokens: u64,
+    input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
+    output_tokens: u64,
+    output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
+    total_tokens: u64,
+}
+
+impl From<ResponseCompletedUsage> for TokenUsage {
+    fn from(val: ResponseCompletedUsage) -> Self {
+        TokenUsage {
+            input_tokens: val.input_tokens,
+            cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
+            output_tokens: val.output_tokens,
+            reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
+            total_tokens: val.total_tokens,
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedInputTokensDetails {
+    cached_tokens: u64,
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedOutputTokensDetails {
+    reasoning_tokens: u64,
 }

 async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
@@ -221,7 +254,7 @@ where
    // If the stream stays completely silent for an extended period treat it as disconnected.
    let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
    // The response id returned from the "complete" message.
-    let mut response_id = None;
+    let mut response_completed: Option<ResponseCompleted> = None;

    loop {
        let sse = match timeout(idle_timeout, stream.next()).await {
@@ -233,9 +266,15 @@ where
                return;
            }
            Ok(None) => {
-                match response_id {
-                    Some(response_id) => {
-                        let event = ResponseEvent::Completed { response_id };
+                match response_completed {
+                    Some(ResponseCompleted {
+                        id: response_id,
+                        usage,
+                    }) => {
+                        let event = ResponseEvent::Completed {
+                            response_id,
+                            token_usage: usage.map(Into::into),
+                        };
                        let _ = tx_event.send(Ok(event)).await;
                    }
                    None => {
@@ -301,7 +340,7 @@ where
                if let Some(resp_val) = event.response {
                    match serde_json::from_value::<ResponseCompleted>(resp_val) {
                        Ok(r) => {
-                            response_id = Some(r.id);
+                            response_completed = Some(r);
                        }
                        Err(e) => {
                            debug!("failed to parse ResponseCompleted: {e}");
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -2,6 +2,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::error::Result;
 use crate::models::ResponseItem;
+use crate::protocol::TokenUsage;
 use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
 use futures::Stream;
 use serde::Serialize;
@@ -51,7 +52,10 @@ impl Prompt {
 #[derive(Debug)]
 pub enum ResponseEvent {
    OutputItemDone(ResponseItem),
-    Completed { response_id: String },
+    Completed {
+        response_id: String,
+        token_usage: Option<TokenUsage>,
+    },
 }

 #[derive(Debug, Serialize)]
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1078,7 +1078,20 @@ async fn try_run_turn(
                let response = handle_response_item(sess, sub_id, item.clone()).await?;
                output.push(ProcessedResponseItem { item, response });
            }
-            ResponseEvent::Completed { response_id } => {
+            ResponseEvent::Completed {
+                response_id,
+                token_usage,
+            } => {
+                if let Some(token_usage) = token_usage {
+                    sess.tx_event
+                        .send(Event {
+                            id: sub_id.to_string(),
+                            msg: EventMsg::TokenCount(token_usage),
+                        })
+                        .await
+                        .ok();
+                }
+
                let mut state = sess.state.lock().unwrap();
                state.previous_response_id = Some(response_id);
                break;
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -10,6 +10,7 @@ use crate::config_types::UriBasedFileOpener;
 use crate::flags::OPENAI_DEFAULT_MODEL;
 use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::built_in_model_providers;
+use crate::openai_model_info::get_model_info;
 use crate::protocol::AskForApproval;
 use crate::protocol::SandboxPolicy;
 use dirs::home_dir;
@@ -30,6 +31,12 @@ pub struct Config {
    /// Optional override of model selection.
    pub model: String,

+    /// Size of the context window for the model, in tokens.
+    pub model_context_window: Option<u64>,
+
+    /// Maximum number of output tokens.
+    pub model_max_output_tokens: Option<u64>,
+
    /// Key into the model_providers map that specifies which provider to use.
    pub model_provider_id: String,

@@ -234,6 +241,12 @@ pub struct ConfigToml {
    /// Provider to use from the model_providers map.
    pub model_provider: Option<String>,

+    /// Size of the context window for the model, in tokens.
+    pub model_context_window: Option<u64>,
+
+    /// Maximum number of output tokens.
+    pub model_max_output_tokens: Option<u64>,
+
    /// Default approval policy for executing commands.
    pub approval_policy: Option<AskForApproval>,

@@ -387,11 +400,23 @@ impl Config {

        let history = cfg.history.unwrap_or_default();

+        let model = model
+            .or(config_profile.model)
+            .or(cfg.model)
+            .unwrap_or_else(default_model);
+        let openai_model_info = get_model_info(&model);
+        let model_context_window = cfg
+            .model_context_window
+            .or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
+        let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
+            openai_model_info
+                .as_ref()
+                .map(|info| info.max_output_tokens)
+        });
        let config = Self {
-            model: model
-                .or(config_profile.model)
-                .or(cfg.model)
-                .unwrap_or_else(default_model),
+            model,
+            model_context_window,
+            model_max_output_tokens,
            model_provider_id,
            model_provider,
            cwd: resolved_cwd,
@@ -687,6 +712,8 @@ disable_response_storage = true
        assert_eq!(
            Config {
                model: "o3".to_string(),
+                model_context_window: Some(200_000),
+                model_max_output_tokens: Some(100_000),
                model_provider_id: "openai".to_string(),
                model_provider: fixture.openai_provider.clone(),
                approval_policy: AskForApproval::Never,
@@ -729,6 +756,8 @@ disable_response_storage = true
        )?;
        let expected_gpt3_profile_config = Config {
            model: "gpt-3.5-turbo".to_string(),
+            model_context_window: Some(16_385),
+            model_max_output_tokens: Some(4_096),
            model_provider_id: "openai-chat-completions".to_string(),
            model_provider: fixture.openai_chat_completions_provider.clone(),
            approval_policy: AskForApproval::UnlessTrusted,
@@ -786,6 +815,8 @@ disable_response_storage = true
        )?;
        let expected_zdr_profile_config = Config {
            model: "o3".to_string(),
+            model_context_window: Some(200_000),
+            model_max_output_tokens: Some(100_000),
            model_provider_id: "openai".to_string(),
            model_provider: fixture.openai_provider.clone(),
            approval_policy: AskForApproval::OnFailure,
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -28,6 +28,7 @@ pub use model_provider_info::ModelProviderInfo;
 pub use model_provider_info::WireApi;
 mod models;
 pub mod openai_api_key;
+mod openai_model_info;
 mod openai_tools;
 mod project_doc;
 pub mod protocol;
--- a/codex-rs/core/src/openai_model_info.rs
+++ b/codex-rs/core/src/openai_model_info.rs
@@ -0,0 +1,71 @@
+/// Metadata about a model, particularly OpenAI models.
+/// We may want to consider including details like the pricing for
+/// input tokens, output tokens, etc., though users will need to be able to
+/// override this in config.toml, as this information can get out of date.
+/// Though this would help present more accurate pricing information in the UI.
+#[derive(Debug)]
+pub(crate) struct ModelInfo {
+    /// Size of the context window in tokens.
+    pub(crate) context_window: u64,
+
+    /// Maximum number of output tokens that can be generated for the model.
+    pub(crate) max_output_tokens: u64,
+}
+
+/// Note details such as what a model like gpt-4o is aliased to may be out of
+/// date.
+pub(crate) fn get_model_info(name: &str) -> Option<ModelInfo> {
+    match name {
+        // https://platform.openai.com/docs/models/o3
+        "o3" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // https://platform.openai.com/docs/models/o4-mini
+        "o4-mini" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // https://platform.openai.com/docs/models/codex-mini-latest
+        "codex-mini-latest" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
+        // https://platform.openai.com/docs/models/gpt-4.1
+        "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
+            context_window: 1_047_576,
+            max_output_tokens: 32_768,
+        }),
+
+        // As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
+        // https://platform.openai.com/docs/models/gpt-4o
+        "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 16_384,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
+        "gpt-4o-2024-05-13" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 4_096,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
+        "gpt-4o-2024-11-20" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 16_384,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-3.5-turbo
+        "gpt-3.5-turbo" => Some(ModelInfo {
+            context_window: 16_385,
+            max_output_tokens: 4_096,
+        }),
+
+        _ => None,
+    }
+}
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -275,6 +275,10 @@ pub enum EventMsg {
    /// Agent has completed all actions
    TaskComplete(TaskCompleteEvent),

+    /// Token count event, sent periodically to report the number of tokens
+    /// used in the current session.
+    TokenCount(TokenUsage),
+
    /// Agent text output message
    AgentMessage(AgentMessageEvent),

@@ -322,6 +326,15 @@ pub struct TaskCompleteEvent {
    pub last_agent_message: Option<String>,
 }

+#[derive(Debug, Clone, Deserialize, Serialize, Default)]
+pub struct TokenUsage {
+    pub input_tokens: u64,
+    pub cached_input_tokens: Option<u64>,
+    pub output_tokens: u64,
+    pub reasoning_output_tokens: Option<u64>,
+    pub total_tokens: u64,
+}
+
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct AgentMessageEvent {
    pub message: String,
--- a/codex-rs/exec/src/event_processor.rs
+++ b/codex-rs/exec/src/event_processor.rs
@@ -16,6 +16,7 @@ use codex_core::protocol::McpToolCallEndEvent;
 use codex_core::protocol::PatchApplyBeginEvent;
 use codex_core::protocol::PatchApplyEndEvent;
 use codex_core::protocol::SessionConfiguredEvent;
+use codex_core::protocol::TokenUsage;
 use owo_colors::OwoColorize;
 use owo_colors::Style;
 use shlex::try_join;
@@ -180,6 +181,9 @@ impl EventProcessor {
            EventMsg::TaskStarted | EventMsg::TaskComplete(_) => {
                // Ignore.
            }
+            EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => {
+                ts_println!(self, "tokens used: {total_tokens}");
+            }
            EventMsg::AgentMessage(AgentMessageEvent { message }) => {
                ts_println!(
                    self,
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -162,6 +162,7 @@ pub async fn run_codex_tool_session(
                    }
                    EventMsg::Error(_)
                    | EventMsg::TaskStarted
+                    | EventMsg::TokenCount(_)
                    | EventMsg::AgentReasoning(_)
                    | EventMsg::McpToolCallBegin(_)
                    | EventMsg::McpToolCallEnd(_)
--- a/codex-rs/tui/src/bottom_pane/chat_composer.rs
+++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs
@@ -1,3 +1,4 @@
+use codex_core::protocol::TokenUsage;
 use crossterm::event::KeyEvent;
 use ratatui::buffer::Buffer;
 use ratatui::layout::Alignment;
@@ -24,6 +25,8 @@ const MIN_TEXTAREA_ROWS: usize = 1;
 /// Rows consumed by the border.
 const BORDER_LINES: u16 = 2;

+const BASE_PLACEHOLDER_TEXT: &str = "send a message";
+
 /// Result returned when the user interacts with the text area.
 pub enum InputResult {
    Submitted(String),
@@ -40,7 +43,7 @@ pub(crate) struct ChatComposer<'a> {
 impl ChatComposer<'_> {
    pub fn new(has_input_focus: bool, app_event_tx: AppEventSender) -> Self {
        let mut textarea = TextArea::default();
-        textarea.set_placeholder_text("send a message");
+        textarea.set_placeholder_text(BASE_PLACEHOLDER_TEXT);
        textarea.set_cursor_line_style(ratatui::style::Style::default());

        let mut this = Self {
@@ -53,6 +56,41 @@ impl ChatComposer<'_> {
        this
    }

+    /// Update the cached *context-left* percentage and refresh the placeholder
+    /// text. The UI relies on the placeholder to convey the remaining
+    /// context when the composer is empty.
+    pub(crate) fn set_token_usage(
+        &mut self,
+        token_usage: TokenUsage,
+        model_context_window: Option<u64>,
+    ) {
+        let placeholder = match (token_usage.total_tokens, model_context_window) {
+            (total_tokens, Some(context_window)) => {
+                let percent_remaining: u8 = if context_window > 0 {
+                    // Calculate the percentage of context left.
+                    let percent = 100.0 - (total_tokens as f32 / context_window as f32 * 100.0);
+                    percent.clamp(0.0, 100.0) as u8
+                } else {
+                    // If we don't have a context window, we cannot compute the
+                    // percentage.
+                    100
+                };
+                if percent_remaining > 25 {
+                    format!("{BASE_PLACEHOLDER_TEXT} — {percent_remaining}% context left")
+                } else {
+                    format!(
+                        "{BASE_PLACEHOLDER_TEXT} — {percent_remaining}% context left (consider /compact)"
+                    )
+                }
+            }
+            (total_tokens, None) => {
+                format!("{BASE_PLACEHOLDER_TEXT} — {total_tokens} tokens used")
+            }
+        };
+
+        self.textarea.set_placeholder_text(placeholder);
+    }
+
    /// Record the history metadata advertised by `SessionConfiguredEvent` so
    /// that the composer can navigate cross-session history.
    pub(crate) fn set_history_metadata(&mut self, log_id: u64, entry_count: usize) {
--- a/codex-rs/tui/src/bottom_pane/mod.rs
+++ b/codex-rs/tui/src/bottom_pane/mod.rs
@@ -2,6 +2,7 @@

 use bottom_pane_view::BottomPaneView;
 use bottom_pane_view::ConditionalUpdate;
+use codex_core::protocol::TokenUsage;
 use crossterm::event::KeyEvent;
 use ratatui::buffer::Buffer;
 use ratatui::layout::Rect;
@@ -129,6 +130,18 @@ impl BottomPane<'_> {
        }
    }

+    /// Update the *context-window remaining* indicator in the composer. This
+    /// is forwarded directly to the underlying `ChatComposer`.
+    pub(crate) fn set_token_usage(
+        &mut self,
+        token_usage: TokenUsage,
+        model_context_window: Option<u64>,
+    ) {
+        self.composer
+            .set_token_usage(token_usage, model_context_window);
+        self.request_redraw();
+    }
+
    /// Called when the agent requests user approval.
    pub fn push_approval_request(&mut self, request: ApprovalRequest) {
        let request = if let Some(view) = self.active_view.as_mut() {
--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -18,6 +18,7 @@ use codex_core::protocol::McpToolCallEndEvent;
 use codex_core::protocol::Op;
 use codex_core::protocol::PatchApplyBeginEvent;
 use codex_core::protocol::TaskCompleteEvent;
+use codex_core::protocol::TokenUsage;
 use crossterm::event::KeyEvent;
 use ratatui::buffer::Buffer;
 use ratatui::layout::Constraint;
@@ -46,6 +47,7 @@ pub(crate) struct ChatWidget<'a> {
    input_focus: InputFocus,
    config: Config,
    initial_user_message: Option<UserMessage>,
+    token_usage: TokenUsage,
 }

 #[derive(Clone, Copy, Eq, PartialEq)]
@@ -131,6 +133,7 @@ impl ChatWidget<'_> {
                initial_prompt.unwrap_or_default(),
                initial_images,
            ),
+            token_usage: TokenUsage::default(),
        }
    }

@@ -250,6 +253,11 @@ impl ChatWidget<'_> {
                self.bottom_pane.set_task_running(false);
                self.request_redraw();
            }
+            EventMsg::TokenCount(token_usage) => {
+                self.token_usage = add_token_usage(&self.token_usage, &token_usage);
+                self.bottom_pane
+                    .set_token_usage(self.token_usage.clone(), self.config.model_context_window);
+            }
            EventMsg::Error(ErrorEvent { message }) => {
                self.conversation_history.add_error(message);
                self.bottom_pane.set_task_running(false);
@@ -410,3 +418,31 @@ impl WidgetRef for &ChatWidget<'_> {
        (&self.bottom_pane).render(chunks[1], buf);
    }
 }
+
+fn add_token_usage(current_usage: &TokenUsage, new_usage: &TokenUsage) -> TokenUsage {
+    let cached_input_tokens = match (
+        current_usage.cached_input_tokens,
+        new_usage.cached_input_tokens,
+    ) {
+        (Some(current), Some(new)) => Some(current + new),
+        (Some(current), None) => Some(current),
+        (None, Some(new)) => Some(new),
+        (None, None) => None,
+    };
+    let reasoning_output_tokens = match (
+        current_usage.reasoning_output_tokens,
+        new_usage.reasoning_output_tokens,
+    ) {
+        (Some(current), Some(new)) => Some(current + new),
+        (Some(current), None) => Some(current),
+        (None, Some(new)) => Some(new),
+        (None, None) => None,
+    };
+    TokenUsage {
+        input_tokens: current_usage.input_tokens + new_usage.input_tokens,
+        cached_input_tokens,
+        output_tokens: current_usage.output_tokens + new_usage.output_tokens,
+        reasoning_output_tokens,
+        total_tokens: current_usage.total_tokens + new_usage.total_tokens,
+    }
+}