feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for a `"response.completed"` event, which includes metrics about the number of tokens consumed. We also introduce `openai_model_info.rs`, which includes current data about the most common OpenAI models available via the API (specifically `context_window` and `max_output_tokens`). If Codex does not recognize the model, you can set `model_context_window` and `model_max_output_tokens` explicitly in `config.toml`. When then introduce a new event type to `protocol.rs`, `TokenCount`, which includes the `TokenUsage` for the most recent turn. Finally, we update the TUI to record the running sum of tokens used so the percentage of available context window remaining can be reported via the placeholder text for the composer: ![Screenshot 2025-06-25 at 11 20 55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49) We could certainly get much fancier with this (such as reporting the estimated cost of the conversation), but for now, we are just trying to achieve feature parity with the TypeScript CLI. Though arguably this improves upon the TypeScript CLI, as the TypeScript CLI uses heuristics to estimate the number of tokens used rather than using the `usage` information directly: 296996d74e/codex-cli/src/utils/approximate-tokens-used.ts (L3-L16) Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
parent 296996d74e
commit fcfe43c7df
14 changed files with 301 additions and 15 deletions
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -215,6 +215,7 @@ where
                let _ = tx_event
                    .send(Ok(ResponseEvent::Completed {
                        response_id: String::new(),
+                        token_usage: None,
                    }))
                    .await;
                return;
@@ -232,6 +233,7 @@ where
            let _ = tx_event
                .send(Ok(ResponseEvent::Completed {
                    response_id: String::new(),
+                    token_usage: None,
                }))
                .await;
            return;
@@ -317,6 +319,7 @@ where
                let _ = tx_event
                    .send(Ok(ResponseEvent::Completed {
                        response_id: String::new(),
+                        token_usage: None,
                    }))
                    .await;

@@ -394,7 +397,10 @@ where
                    // Not an assistant message – forward immediately.
                    return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
                }
-                Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => {
+                Poll::Ready(Some(Ok(ResponseEvent::Completed {
+                    response_id,
+                    token_usage,
+                }))) => {
                    if !this.cumulative.is_empty() {
                        let aggregated_item = crate::models::ResponseItem::Message {
                            role: "assistant".to_string(),
@@ -404,7 +410,10 @@ where
                        };

                        // Buffer Completed so it is returned *after* the aggregated message.
-                        this.pending_completed = Some(ResponseEvent::Completed { response_id });
+                        this.pending_completed = Some(ResponseEvent::Completed {
+                            response_id,
+                            token_usage,
+                        });

                        return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
                            aggregated_item,
@@ -412,7 +421,10 @@ where
                    }

                    // Nothing aggregated – forward Completed directly.
-                    return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id })));
+                    return Poll::Ready(Some(Ok(ResponseEvent::Completed {
+                        response_id,
+                        token_usage,
+                    })));
                } // No other `Ok` variants exist at the moment, continue polling.
            }
        }
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -35,6 +35,7 @@ use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
 use crate::models::ResponseItem;
 use crate::openai_tools::create_tools_json_for_responses_api;
+use crate::protocol::TokenUsage;
 use crate::util::backoff;

 #[derive(Clone)]
@@ -210,6 +211,38 @@ struct SseEvent {
 #[derive(Debug, Deserialize)]
 struct ResponseCompleted {
    id: String,
+    usage: Option<ResponseCompletedUsage>,
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedUsage {
+    input_tokens: u64,
+    input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
+    output_tokens: u64,
+    output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
+    total_tokens: u64,
+}
+
+impl From<ResponseCompletedUsage> for TokenUsage {
+    fn from(val: ResponseCompletedUsage) -> Self {
+        TokenUsage {
+            input_tokens: val.input_tokens,
+            cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
+            output_tokens: val.output_tokens,
+            reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
+            total_tokens: val.total_tokens,
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedInputTokensDetails {
+    cached_tokens: u64,
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedOutputTokensDetails {
+    reasoning_tokens: u64,
 }

 async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
@@ -221,7 +254,7 @@ where
    // If the stream stays completely silent for an extended period treat it as disconnected.
    let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
    // The response id returned from the "complete" message.
-    let mut response_id = None;
+    let mut response_completed: Option<ResponseCompleted> = None;

    loop {
        let sse = match timeout(idle_timeout, stream.next()).await {
@@ -233,9 +266,15 @@ where
                return;
            }
            Ok(None) => {
-                match response_id {
-                    Some(response_id) => {
-                        let event = ResponseEvent::Completed { response_id };
+                match response_completed {
+                    Some(ResponseCompleted {
+                        id: response_id,
+                        usage,
+                    }) => {
+                        let event = ResponseEvent::Completed {
+                            response_id,
+                            token_usage: usage.map(Into::into),
+                        };
                        let _ = tx_event.send(Ok(event)).await;
                    }
                    None => {
@@ -301,7 +340,7 @@ where
                if let Some(resp_val) = event.response {
                    match serde_json::from_value::<ResponseCompleted>(resp_val) {
                        Ok(r) => {
-                            response_id = Some(r.id);
+                            response_completed = Some(r);
                        }
                        Err(e) => {
                            debug!("failed to parse ResponseCompleted: {e}");
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -2,6 +2,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
 use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use crate::error::Result;
 use crate::models::ResponseItem;
+use crate::protocol::TokenUsage;
 use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
 use futures::Stream;
 use serde::Serialize;
@@ -51,7 +52,10 @@ impl Prompt {
 #[derive(Debug)]
 pub enum ResponseEvent {
    OutputItemDone(ResponseItem),
-    Completed { response_id: String },
+    Completed {
+        response_id: String,
+        token_usage: Option<TokenUsage>,
+    },
 }

 #[derive(Debug, Serialize)]
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1078,7 +1078,20 @@ async fn try_run_turn(
                let response = handle_response_item(sess, sub_id, item.clone()).await?;
                output.push(ProcessedResponseItem { item, response });
            }
-            ResponseEvent::Completed { response_id } => {
+            ResponseEvent::Completed {
+                response_id,
+                token_usage,
+            } => {
+                if let Some(token_usage) = token_usage {
+                    sess.tx_event
+                        .send(Event {
+                            id: sub_id.to_string(),
+                            msg: EventMsg::TokenCount(token_usage),
+                        })
+                        .await
+                        .ok();
+                }
+
                let mut state = sess.state.lock().unwrap();
                state.previous_response_id = Some(response_id);
                break;
--- a/codex-rs/core/src/config.rs
+++ b/codex-rs/core/src/config.rs
@@ -10,6 +10,7 @@ use crate::config_types::UriBasedFileOpener;
 use crate::flags::OPENAI_DEFAULT_MODEL;
 use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::built_in_model_providers;
+use crate::openai_model_info::get_model_info;
 use crate::protocol::AskForApproval;
 use crate::protocol::SandboxPolicy;
 use dirs::home_dir;
@@ -30,6 +31,12 @@ pub struct Config {
    /// Optional override of model selection.
    pub model: String,

+    /// Size of the context window for the model, in tokens.
+    pub model_context_window: Option<u64>,
+
+    /// Maximum number of output tokens.
+    pub model_max_output_tokens: Option<u64>,
+
    /// Key into the model_providers map that specifies which provider to use.
    pub model_provider_id: String,

@@ -234,6 +241,12 @@ pub struct ConfigToml {
    /// Provider to use from the model_providers map.
    pub model_provider: Option<String>,

+    /// Size of the context window for the model, in tokens.
+    pub model_context_window: Option<u64>,
+
+    /// Maximum number of output tokens.
+    pub model_max_output_tokens: Option<u64>,
+
    /// Default approval policy for executing commands.
    pub approval_policy: Option<AskForApproval>,

@@ -387,11 +400,23 @@ impl Config {

        let history = cfg.history.unwrap_or_default();

+        let model = model
+            .or(config_profile.model)
+            .or(cfg.model)
+            .unwrap_or_else(default_model);
+        let openai_model_info = get_model_info(&model);
+        let model_context_window = cfg
+            .model_context_window
+            .or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
+        let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
+            openai_model_info
+                .as_ref()
+                .map(|info| info.max_output_tokens)
+        });
        let config = Self {
-            model: model
-                .or(config_profile.model)
-                .or(cfg.model)
-                .unwrap_or_else(default_model),
+            model,
+            model_context_window,
+            model_max_output_tokens,
            model_provider_id,
            model_provider,
            cwd: resolved_cwd,
@@ -687,6 +712,8 @@ disable_response_storage = true
        assert_eq!(
            Config {
                model: "o3".to_string(),
+                model_context_window: Some(200_000),
+                model_max_output_tokens: Some(100_000),
                model_provider_id: "openai".to_string(),
                model_provider: fixture.openai_provider.clone(),
                approval_policy: AskForApproval::Never,
@@ -729,6 +756,8 @@ disable_response_storage = true
        )?;
        let expected_gpt3_profile_config = Config {
            model: "gpt-3.5-turbo".to_string(),
+            model_context_window: Some(16_385),
+            model_max_output_tokens: Some(4_096),
            model_provider_id: "openai-chat-completions".to_string(),
            model_provider: fixture.openai_chat_completions_provider.clone(),
            approval_policy: AskForApproval::UnlessTrusted,
@@ -786,6 +815,8 @@ disable_response_storage = true
        )?;
        let expected_zdr_profile_config = Config {
            model: "o3".to_string(),
+            model_context_window: Some(200_000),
+            model_max_output_tokens: Some(100_000),
            model_provider_id: "openai".to_string(),
            model_provider: fixture.openai_provider.clone(),
            approval_policy: AskForApproval::OnFailure,
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -28,6 +28,7 @@ pub use model_provider_info::ModelProviderInfo;
 pub use model_provider_info::WireApi;
 mod models;
 pub mod openai_api_key;
+mod openai_model_info;
 mod openai_tools;
 mod project_doc;
 pub mod protocol;
--- a/codex-rs/core/src/openai_model_info.rs
+++ b/codex-rs/core/src/openai_model_info.rs
@@ -0,0 +1,71 @@
+/// Metadata about a model, particularly OpenAI models.
+/// We may want to consider including details like the pricing for
+/// input tokens, output tokens, etc., though users will need to be able to
+/// override this in config.toml, as this information can get out of date.
+/// Though this would help present more accurate pricing information in the UI.
+#[derive(Debug)]
+pub(crate) struct ModelInfo {
+    /// Size of the context window in tokens.
+    pub(crate) context_window: u64,
+
+    /// Maximum number of output tokens that can be generated for the model.
+    pub(crate) max_output_tokens: u64,
+}
+
+/// Note details such as what a model like gpt-4o is aliased to may be out of
+/// date.
+pub(crate) fn get_model_info(name: &str) -> Option<ModelInfo> {
+    match name {
+        // https://platform.openai.com/docs/models/o3
+        "o3" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // https://platform.openai.com/docs/models/o4-mini
+        "o4-mini" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // https://platform.openai.com/docs/models/codex-mini-latest
+        "codex-mini-latest" => Some(ModelInfo {
+            context_window: 200_000,
+            max_output_tokens: 100_000,
+        }),
+
+        // As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
+        // https://platform.openai.com/docs/models/gpt-4.1
+        "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
+            context_window: 1_047_576,
+            max_output_tokens: 32_768,
+        }),
+
+        // As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
+        // https://platform.openai.com/docs/models/gpt-4o
+        "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 16_384,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
+        "gpt-4o-2024-05-13" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 4_096,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
+        "gpt-4o-2024-11-20" => Some(ModelInfo {
+            context_window: 128_000,
+            max_output_tokens: 16_384,
+        }),
+
+        // https://platform.openai.com/docs/models/gpt-3.5-turbo
+        "gpt-3.5-turbo" => Some(ModelInfo {
+            context_window: 16_385,
+            max_output_tokens: 4_096,
+        }),
+
+        _ => None,
+    }
+}
--- a/codex-rs/core/src/protocol.rs
+++ b/codex-rs/core/src/protocol.rs
@@ -275,6 +275,10 @@ pub enum EventMsg {
    /// Agent has completed all actions
    TaskComplete(TaskCompleteEvent),

+    /// Token count event, sent periodically to report the number of tokens
+    /// used in the current session.
+    TokenCount(TokenUsage),
+
    /// Agent text output message
    AgentMessage(AgentMessageEvent),

@@ -322,6 +326,15 @@ pub struct TaskCompleteEvent {
    pub last_agent_message: Option<String>,
 }

+#[derive(Debug, Clone, Deserialize, Serialize, Default)]
+pub struct TokenUsage {
+    pub input_tokens: u64,
+    pub cached_input_tokens: Option<u64>,
+    pub output_tokens: u64,
+    pub reasoning_output_tokens: Option<u64>,
+    pub total_tokens: u64,
+}
+
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct AgentMessageEvent {
    pub message: String,