feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


296996d74e/codex-cli/src/utils/approximate-tokens-used.ts (L3-L16)

Fixes https://github.com/openai/codex/issues/1242
This commit is contained in:
Michael Bolin
2025-06-25 23:31:11 -07:00
committed by GitHub
parent 296996d74e
commit fcfe43c7df
14 changed files with 301 additions and 15 deletions

View File

@@ -215,6 +215,7 @@ where
let _ = tx_event
.send(Ok(ResponseEvent::Completed {
response_id: String::new(),
token_usage: None,
}))
.await;
return;
@@ -232,6 +233,7 @@ where
let _ = tx_event
.send(Ok(ResponseEvent::Completed {
response_id: String::new(),
token_usage: None,
}))
.await;
return;
@@ -317,6 +319,7 @@ where
let _ = tx_event
.send(Ok(ResponseEvent::Completed {
response_id: String::new(),
token_usage: None,
}))
.await;
@@ -394,7 +397,10 @@ where
// Not an assistant message forward immediately.
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item))));
}
Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => {
Poll::Ready(Some(Ok(ResponseEvent::Completed {
response_id,
token_usage,
}))) => {
if !this.cumulative.is_empty() {
let aggregated_item = crate::models::ResponseItem::Message {
role: "assistant".to_string(),
@@ -404,7 +410,10 @@ where
};
// Buffer Completed so it is returned *after* the aggregated message.
this.pending_completed = Some(ResponseEvent::Completed { response_id });
this.pending_completed = Some(ResponseEvent::Completed {
response_id,
token_usage,
});
return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(
aggregated_item,
@@ -412,7 +421,10 @@ where
}
// Nothing aggregated forward Completed directly.
return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id })));
return Poll::Ready(Some(Ok(ResponseEvent::Completed {
response_id,
token_usage,
})));
} // No other `Ok` variants exist at the moment, continue polling.
}
}

View File

@@ -35,6 +35,7 @@ use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::WireApi;
use crate::models::ResponseItem;
use crate::openai_tools::create_tools_json_for_responses_api;
use crate::protocol::TokenUsage;
use crate::util::backoff;
#[derive(Clone)]
@@ -210,6 +211,38 @@ struct SseEvent {
#[derive(Debug, Deserialize)]
struct ResponseCompleted {
id: String,
usage: Option<ResponseCompletedUsage>,
}
#[derive(Debug, Deserialize)]
struct ResponseCompletedUsage {
input_tokens: u64,
input_tokens_details: Option<ResponseCompletedInputTokensDetails>,
output_tokens: u64,
output_tokens_details: Option<ResponseCompletedOutputTokensDetails>,
total_tokens: u64,
}
impl From<ResponseCompletedUsage> for TokenUsage {
fn from(val: ResponseCompletedUsage) -> Self {
TokenUsage {
input_tokens: val.input_tokens,
cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
output_tokens: val.output_tokens,
reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
total_tokens: val.total_tokens,
}
}
}
#[derive(Debug, Deserialize)]
struct ResponseCompletedInputTokensDetails {
cached_tokens: u64,
}
#[derive(Debug, Deserialize)]
struct ResponseCompletedOutputTokensDetails {
reasoning_tokens: u64,
}
async fn process_sse<S>(stream: S, tx_event: mpsc::Sender<Result<ResponseEvent>>)
@@ -221,7 +254,7 @@ where
// If the stream stays completely silent for an extended period treat it as disconnected.
let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS;
// The response id returned from the "complete" message.
let mut response_id = None;
let mut response_completed: Option<ResponseCompleted> = None;
loop {
let sse = match timeout(idle_timeout, stream.next()).await {
@@ -233,9 +266,15 @@ where
return;
}
Ok(None) => {
match response_id {
Some(response_id) => {
let event = ResponseEvent::Completed { response_id };
match response_completed {
Some(ResponseCompleted {
id: response_id,
usage,
}) => {
let event = ResponseEvent::Completed {
response_id,
token_usage: usage.map(Into::into),
};
let _ = tx_event.send(Ok(event)).await;
}
None => {
@@ -301,7 +340,7 @@ where
if let Some(resp_val) = event.response {
match serde_json::from_value::<ResponseCompleted>(resp_val) {
Ok(r) => {
response_id = Some(r.id);
response_completed = Some(r);
}
Err(e) => {
debug!("failed to parse ResponseCompleted: {e}");

View File

@@ -2,6 +2,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig;
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::error::Result;
use crate::models::ResponseItem;
use crate::protocol::TokenUsage;
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
use futures::Stream;
use serde::Serialize;
@@ -51,7 +52,10 @@ impl Prompt {
#[derive(Debug)]
pub enum ResponseEvent {
OutputItemDone(ResponseItem),
Completed { response_id: String },
Completed {
response_id: String,
token_usage: Option<TokenUsage>,
},
}
#[derive(Debug, Serialize)]

View File

@@ -1078,7 +1078,20 @@ async fn try_run_turn(
let response = handle_response_item(sess, sub_id, item.clone()).await?;
output.push(ProcessedResponseItem { item, response });
}
ResponseEvent::Completed { response_id } => {
ResponseEvent::Completed {
response_id,
token_usage,
} => {
if let Some(token_usage) = token_usage {
sess.tx_event
.send(Event {
id: sub_id.to_string(),
msg: EventMsg::TokenCount(token_usage),
})
.await
.ok();
}
let mut state = sess.state.lock().unwrap();
state.previous_response_id = Some(response_id);
break;

View File

@@ -10,6 +10,7 @@ use crate::config_types::UriBasedFileOpener;
use crate::flags::OPENAI_DEFAULT_MODEL;
use crate::model_provider_info::ModelProviderInfo;
use crate::model_provider_info::built_in_model_providers;
use crate::openai_model_info::get_model_info;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPolicy;
use dirs::home_dir;
@@ -30,6 +31,12 @@ pub struct Config {
/// Optional override of model selection.
pub model: String,
/// Size of the context window for the model, in tokens.
pub model_context_window: Option<u64>,
/// Maximum number of output tokens.
pub model_max_output_tokens: Option<u64>,
/// Key into the model_providers map that specifies which provider to use.
pub model_provider_id: String,
@@ -234,6 +241,12 @@ pub struct ConfigToml {
/// Provider to use from the model_providers map.
pub model_provider: Option<String>,
/// Size of the context window for the model, in tokens.
pub model_context_window: Option<u64>,
/// Maximum number of output tokens.
pub model_max_output_tokens: Option<u64>,
/// Default approval policy for executing commands.
pub approval_policy: Option<AskForApproval>,
@@ -387,11 +400,23 @@ impl Config {
let history = cfg.history.unwrap_or_default();
let model = model
.or(config_profile.model)
.or(cfg.model)
.unwrap_or_else(default_model);
let openai_model_info = get_model_info(&model);
let model_context_window = cfg
.model_context_window
.or_else(|| openai_model_info.as_ref().map(|info| info.context_window));
let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| {
openai_model_info
.as_ref()
.map(|info| info.max_output_tokens)
});
let config = Self {
model: model
.or(config_profile.model)
.or(cfg.model)
.unwrap_or_else(default_model),
model,
model_context_window,
model_max_output_tokens,
model_provider_id,
model_provider,
cwd: resolved_cwd,
@@ -687,6 +712,8 @@ disable_response_storage = true
assert_eq!(
Config {
model: "o3".to_string(),
model_context_window: Some(200_000),
model_max_output_tokens: Some(100_000),
model_provider_id: "openai".to_string(),
model_provider: fixture.openai_provider.clone(),
approval_policy: AskForApproval::Never,
@@ -729,6 +756,8 @@ disable_response_storage = true
)?;
let expected_gpt3_profile_config = Config {
model: "gpt-3.5-turbo".to_string(),
model_context_window: Some(16_385),
model_max_output_tokens: Some(4_096),
model_provider_id: "openai-chat-completions".to_string(),
model_provider: fixture.openai_chat_completions_provider.clone(),
approval_policy: AskForApproval::UnlessTrusted,
@@ -786,6 +815,8 @@ disable_response_storage = true
)?;
let expected_zdr_profile_config = Config {
model: "o3".to_string(),
model_context_window: Some(200_000),
model_max_output_tokens: Some(100_000),
model_provider_id: "openai".to_string(),
model_provider: fixture.openai_provider.clone(),
approval_policy: AskForApproval::OnFailure,

View File

@@ -28,6 +28,7 @@ pub use model_provider_info::ModelProviderInfo;
pub use model_provider_info::WireApi;
mod models;
pub mod openai_api_key;
mod openai_model_info;
mod openai_tools;
mod project_doc;
pub mod protocol;

View File

@@ -0,0 +1,71 @@
/// Metadata about a model, particularly OpenAI models.
/// We may want to consider including details like the pricing for
/// input tokens, output tokens, etc., though users will need to be able to
/// override this in config.toml, as this information can get out of date.
/// Though this would help present more accurate pricing information in the UI.
#[derive(Debug)]
pub(crate) struct ModelInfo {
/// Size of the context window in tokens.
pub(crate) context_window: u64,
/// Maximum number of output tokens that can be generated for the model.
pub(crate) max_output_tokens: u64,
}
/// Note details such as what a model like gpt-4o is aliased to may be out of
/// date.
pub(crate) fn get_model_info(name: &str) -> Option<ModelInfo> {
match name {
// https://platform.openai.com/docs/models/o3
"o3" => Some(ModelInfo {
context_window: 200_000,
max_output_tokens: 100_000,
}),
// https://platform.openai.com/docs/models/o4-mini
"o4-mini" => Some(ModelInfo {
context_window: 200_000,
max_output_tokens: 100_000,
}),
// https://platform.openai.com/docs/models/codex-mini-latest
"codex-mini-latest" => Some(ModelInfo {
context_window: 200_000,
max_output_tokens: 100_000,
}),
// As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
// https://platform.openai.com/docs/models/gpt-4.1
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
context_window: 1_047_576,
max_output_tokens: 32_768,
}),
// As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
// https://platform.openai.com/docs/models/gpt-4o
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
context_window: 128_000,
max_output_tokens: 16_384,
}),
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
"gpt-4o-2024-05-13" => Some(ModelInfo {
context_window: 128_000,
max_output_tokens: 4_096,
}),
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
"gpt-4o-2024-11-20" => Some(ModelInfo {
context_window: 128_000,
max_output_tokens: 16_384,
}),
// https://platform.openai.com/docs/models/gpt-3.5-turbo
"gpt-3.5-turbo" => Some(ModelInfo {
context_window: 16_385,
max_output_tokens: 4_096,
}),
_ => None,
}
}

View File

@@ -275,6 +275,10 @@ pub enum EventMsg {
/// Agent has completed all actions
TaskComplete(TaskCompleteEvent),
/// Token count event, sent periodically to report the number of tokens
/// used in the current session.
TokenCount(TokenUsage),
/// Agent text output message
AgentMessage(AgentMessageEvent),
@@ -322,6 +326,15 @@ pub struct TaskCompleteEvent {
pub last_agent_message: Option<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
pub struct TokenUsage {
pub input_tokens: u64,
pub cached_input_tokens: Option<u64>,
pub output_tokens: u64,
pub reasoning_output_tokens: Option<u64>,
pub total_tokens: u64,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct AgentMessageEvent {
pub message: String,