feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for a `"response.completed"` event, which includes metrics about the number of tokens consumed. We also introduce `openai_model_info.rs`, which includes current data about the most common OpenAI models available via the API (specifically `context_window` and `max_output_tokens`). If Codex does not recognize the model, you can set `model_context_window` and `model_max_output_tokens` explicitly in `config.toml`. When then introduce a new event type to `protocol.rs`, `TokenCount`, which includes the `TokenUsage` for the most recent turn. Finally, we update the TUI to record the running sum of tokens used so the percentage of available context window remaining can be reported via the placeholder text for the composer: ![Screenshot 2025-06-25 at 11 20 55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49) We could certainly get much fancier with this (such as reporting the estimated cost of the conversation), but for now, we are just trying to achieve feature parity with the TypeScript CLI. Though arguably this improves upon the TypeScript CLI, as the TypeScript CLI uses heuristics to estimate the number of tokens used rather than using the `usage` information directly: 296996d74e/codex-cli/src/utils/approximate-tokens-used.ts (L3-L16) Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
parent 296996d74e
commit fcfe43c7df
14 changed files with 301 additions and 15 deletions
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -35,6 +35,7 @@ use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
 use crate::models::ResponseItem;
 use crate::openai_tools::create_tools_json_for_responses_api;
+use crate::protocol::TokenUsage;
 use crate::util::backoff;

 #[derive(Clone)]
@@ -210,6 +211,38 @@ struct SseEvent {
 #[derive(Debug, Deserialize)]
 struct ResponseCompleted {
    id: String,
+    usage: Option<ResponseCompletedUsage>,
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedUsage {
+    input_tokens: u64,
+    input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
+    output_tokens: u64,
+    output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
+    total_tokens: u64,
+}
+
+impl From<ResponseCompletedUsage> for TokenUsage {
+    fn from(val: ResponseCompletedUsage) -> Self {
+        TokenUsage {
+            input_tokens: val.input_tokens,
+            cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
+            output_tokens: val.output_tokens,
+            reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
+            total_tokens: val.total_tokens,
+        }
+    }
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedInputTokensDetails {
+    cached_tokens: u64,
+}
+
+#[derive(Debug, Deserialize)]
+struct ResponseCompletedOutputTokensDetails {
+    reasoning_tokens: u64,
 }

 async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
@@ -221,7 +254,7 @@ where
    // If the stream stays completely silent for an extended period treat it as disconnected.
    let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
    // The response id returned from the "complete" message.
-    let mut response_id = None;
+    let mut response_completed: Option<ResponseCompleted> = None;

    loop {
        let sse = match timeout(idle_timeout, stream.next()).await {
@@ -233,9 +266,15 @@ where
                return;
            }
            Ok(None) => {
-                match response_id {
-                    Some(response_id) => {
-                        let event = ResponseEvent::Completed { response_id };
+                match response_completed {
+                    Some(ResponseCompleted {
+                        id: response_id,
+                        usage,
+                    }) => {
+                        let event = ResponseEvent::Completed {
+                            response_id,
+                            token_usage: usage.map(Into::into),
+                        };
                        let _ = tx_event.send(Ok(event)).await;
                    }
                    None => {
@@ -301,7 +340,7 @@ where
                if let Some(resp_val) = event.response {
                    match serde_json::from_value::<ResponseCompleted>(resp_val) {
                        Ok(r) => {
-                            response_id = Some(r.id);
+                            response_completed = Some(r);
                        }
                        Err(e) => {
                            debug!("failed to parse ResponseCompleted: {e}");