diff --git a/codex-rs/config.md b/codex-rs/config.md index 14d5fd22..bb8b6716 100644 --- a/codex-rs/config.md +++ b/codex-rs/config.md @@ -407,6 +407,16 @@ Setting `hide_agent_reasoning` to `true` suppresses these events in **both** the hide_agent_reasoning = true # defaults to false ``` +## model_context_window + +The size of the context window for the model, in tokens. + +In general, Codex knows the context window for the most common OpenAI models, but if you are using a new model with an old version of the Codex CLI, then you can use `model_context_window` to tell Codex what value to use to determine how much context is left during a conversation. + +## model_max_output_tokens + +This is analogous to `model_context_window`, but for the maximum number of output tokens for the model. + ## project_doc_max_bytes Maximum number of bytes to read from an `AGENTS.md` file to include in the instructions sent with the first turn of a session. Defaults to 32 KiB. diff --git a/codex-rs/core/src/chat_completions.rs b/codex-rs/core/src/chat_completions.rs index f381c72e..12c5b7af 100644 --- a/codex-rs/core/src/chat_completions.rs +++ b/codex-rs/core/src/chat_completions.rs @@ -215,6 +215,7 @@ where let _ = tx_event .send(Ok(ResponseEvent::Completed { response_id: String::new(), + token_usage: None, })) .await; return; @@ -232,6 +233,7 @@ where let _ = tx_event .send(Ok(ResponseEvent::Completed { response_id: String::new(), + token_usage: None, })) .await; return; @@ -317,6 +319,7 @@ where let _ = tx_event .send(Ok(ResponseEvent::Completed { response_id: String::new(), + token_usage: None, })) .await; @@ -394,7 +397,10 @@ where // Not an assistant message – forward immediately. return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone(item)))); } - Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))) => { + Poll::Ready(Some(Ok(ResponseEvent::Completed { + response_id, + token_usage, + }))) => { if !this.cumulative.is_empty() { let aggregated_item = crate::models::ResponseItem::Message { role: "assistant".to_string(), @@ -404,7 +410,10 @@ where }; // Buffer Completed so it is returned *after* the aggregated message. - this.pending_completed = Some(ResponseEvent::Completed { response_id }); + this.pending_completed = Some(ResponseEvent::Completed { + response_id, + token_usage, + }); return Poll::Ready(Some(Ok(ResponseEvent::OutputItemDone( aggregated_item, @@ -412,7 +421,10 @@ where } // Nothing aggregated – forward Completed directly. - return Poll::Ready(Some(Ok(ResponseEvent::Completed { response_id }))); + return Poll::Ready(Some(Ok(ResponseEvent::Completed { + response_id, + token_usage, + }))); } // No other `Ok` variants exist at the moment, continue polling. } } diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index aff83888..4770796d 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -35,6 +35,7 @@ use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::WireApi; use crate::models::ResponseItem; use crate::openai_tools::create_tools_json_for_responses_api; +use crate::protocol::TokenUsage; use crate::util::backoff; #[derive(Clone)] @@ -210,6 +211,38 @@ struct SseEvent { #[derive(Debug, Deserialize)] struct ResponseCompleted { id: String, + usage: Option, +} + +#[derive(Debug, Deserialize)] +struct ResponseCompletedUsage { + input_tokens: u64, + input_tokens_details: Option, + output_tokens: u64, + output_tokens_details: Option, + total_tokens: u64, +} + +impl From for TokenUsage { + fn from(val: ResponseCompletedUsage) -> Self { + TokenUsage { + input_tokens: val.input_tokens, + cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens), + output_tokens: val.output_tokens, + reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens), + total_tokens: val.total_tokens, + } + } +} + +#[derive(Debug, Deserialize)] +struct ResponseCompletedInputTokensDetails { + cached_tokens: u64, +} + +#[derive(Debug, Deserialize)] +struct ResponseCompletedOutputTokensDetails { + reasoning_tokens: u64, } async fn process_sse(stream: S, tx_event: mpsc::Sender>) @@ -221,7 +254,7 @@ where // If the stream stays completely silent for an extended period treat it as disconnected. let idle_timeout = *OPENAI_STREAM_IDLE_TIMEOUT_MS; // The response id returned from the "complete" message. - let mut response_id = None; + let mut response_completed: Option = None; loop { let sse = match timeout(idle_timeout, stream.next()).await { @@ -233,9 +266,15 @@ where return; } Ok(None) => { - match response_id { - Some(response_id) => { - let event = ResponseEvent::Completed { response_id }; + match response_completed { + Some(ResponseCompleted { + id: response_id, + usage, + }) => { + let event = ResponseEvent::Completed { + response_id, + token_usage: usage.map(Into::into), + }; let _ = tx_event.send(Ok(event)).await; } None => { @@ -301,7 +340,7 @@ where if let Some(resp_val) = event.response { match serde_json::from_value::(resp_val) { Ok(r) => { - response_id = Some(r.id); + response_completed = Some(r); } Err(e) => { debug!("failed to parse ResponseCompleted: {e}"); diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index a2633475..e17cf22c 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -2,6 +2,7 @@ use crate::config_types::ReasoningEffort as ReasoningEffortConfig; use crate::config_types::ReasoningSummary as ReasoningSummaryConfig; use crate::error::Result; use crate::models::ResponseItem; +use crate::protocol::TokenUsage; use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS; use futures::Stream; use serde::Serialize; @@ -51,7 +52,10 @@ impl Prompt { #[derive(Debug)] pub enum ResponseEvent { OutputItemDone(ResponseItem), - Completed { response_id: String }, + Completed { + response_id: String, + token_usage: Option, + }, } #[derive(Debug, Serialize)] diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index e12a3a60..a43f75a7 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1078,7 +1078,20 @@ async fn try_run_turn( let response = handle_response_item(sess, sub_id, item.clone()).await?; output.push(ProcessedResponseItem { item, response }); } - ResponseEvent::Completed { response_id } => { + ResponseEvent::Completed { + response_id, + token_usage, + } => { + if let Some(token_usage) = token_usage { + sess.tx_event + .send(Event { + id: sub_id.to_string(), + msg: EventMsg::TokenCount(token_usage), + }) + .await + .ok(); + } + let mut state = sess.state.lock().unwrap(); state.previous_response_id = Some(response_id); break; diff --git a/codex-rs/core/src/config.rs b/codex-rs/core/src/config.rs index e01bb3f4..6652d7c7 100644 --- a/codex-rs/core/src/config.rs +++ b/codex-rs/core/src/config.rs @@ -10,6 +10,7 @@ use crate::config_types::UriBasedFileOpener; use crate::flags::OPENAI_DEFAULT_MODEL; use crate::model_provider_info::ModelProviderInfo; use crate::model_provider_info::built_in_model_providers; +use crate::openai_model_info::get_model_info; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; use dirs::home_dir; @@ -30,6 +31,12 @@ pub struct Config { /// Optional override of model selection. pub model: String, + /// Size of the context window for the model, in tokens. + pub model_context_window: Option, + + /// Maximum number of output tokens. + pub model_max_output_tokens: Option, + /// Key into the model_providers map that specifies which provider to use. pub model_provider_id: String, @@ -234,6 +241,12 @@ pub struct ConfigToml { /// Provider to use from the model_providers map. pub model_provider: Option, + /// Size of the context window for the model, in tokens. + pub model_context_window: Option, + + /// Maximum number of output tokens. + pub model_max_output_tokens: Option, + /// Default approval policy for executing commands. pub approval_policy: Option, @@ -387,11 +400,23 @@ impl Config { let history = cfg.history.unwrap_or_default(); + let model = model + .or(config_profile.model) + .or(cfg.model) + .unwrap_or_else(default_model); + let openai_model_info = get_model_info(&model); + let model_context_window = cfg + .model_context_window + .or_else(|| openai_model_info.as_ref().map(|info| info.context_window)); + let model_max_output_tokens = cfg.model_max_output_tokens.or_else(|| { + openai_model_info + .as_ref() + .map(|info| info.max_output_tokens) + }); let config = Self { - model: model - .or(config_profile.model) - .or(cfg.model) - .unwrap_or_else(default_model), + model, + model_context_window, + model_max_output_tokens, model_provider_id, model_provider, cwd: resolved_cwd, @@ -687,6 +712,8 @@ disable_response_storage = true assert_eq!( Config { model: "o3".to_string(), + model_context_window: Some(200_000), + model_max_output_tokens: Some(100_000), model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::Never, @@ -729,6 +756,8 @@ disable_response_storage = true )?; let expected_gpt3_profile_config = Config { model: "gpt-3.5-turbo".to_string(), + model_context_window: Some(16_385), + model_max_output_tokens: Some(4_096), model_provider_id: "openai-chat-completions".to_string(), model_provider: fixture.openai_chat_completions_provider.clone(), approval_policy: AskForApproval::UnlessTrusted, @@ -786,6 +815,8 @@ disable_response_storage = true )?; let expected_zdr_profile_config = Config { model: "o3".to_string(), + model_context_window: Some(200_000), + model_max_output_tokens: Some(100_000), model_provider_id: "openai".to_string(), model_provider: fixture.openai_provider.clone(), approval_policy: AskForApproval::OnFailure, diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 16cf1905..6812260c 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -28,6 +28,7 @@ pub use model_provider_info::ModelProviderInfo; pub use model_provider_info::WireApi; mod models; pub mod openai_api_key; +mod openai_model_info; mod openai_tools; mod project_doc; pub mod protocol; diff --git a/codex-rs/core/src/openai_model_info.rs b/codex-rs/core/src/openai_model_info.rs new file mode 100644 index 00000000..9ffd831a --- /dev/null +++ b/codex-rs/core/src/openai_model_info.rs @@ -0,0 +1,71 @@ +/// Metadata about a model, particularly OpenAI models. +/// We may want to consider including details like the pricing for +/// input tokens, output tokens, etc., though users will need to be able to +/// override this in config.toml, as this information can get out of date. +/// Though this would help present more accurate pricing information in the UI. +#[derive(Debug)] +pub(crate) struct ModelInfo { + /// Size of the context window in tokens. + pub(crate) context_window: u64, + + /// Maximum number of output tokens that can be generated for the model. + pub(crate) max_output_tokens: u64, +} + +/// Note details such as what a model like gpt-4o is aliased to may be out of +/// date. +pub(crate) fn get_model_info(name: &str) -> Option { + match name { + // https://platform.openai.com/docs/models/o3 + "o3" => Some(ModelInfo { + context_window: 200_000, + max_output_tokens: 100_000, + }), + + // https://platform.openai.com/docs/models/o4-mini + "o4-mini" => Some(ModelInfo { + context_window: 200_000, + max_output_tokens: 100_000, + }), + + // https://platform.openai.com/docs/models/codex-mini-latest + "codex-mini-latest" => Some(ModelInfo { + context_window: 200_000, + max_output_tokens: 100_000, + }), + + // As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14. + // https://platform.openai.com/docs/models/gpt-4.1 + "gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo { + context_window: 1_047_576, + max_output_tokens: 32_768, + }), + + // As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06. + // https://platform.openai.com/docs/models/gpt-4o + "gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo { + context_window: 128_000, + max_output_tokens: 16_384, + }), + + // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13 + "gpt-4o-2024-05-13" => Some(ModelInfo { + context_window: 128_000, + max_output_tokens: 4_096, + }), + + // https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20 + "gpt-4o-2024-11-20" => Some(ModelInfo { + context_window: 128_000, + max_output_tokens: 16_384, + }), + + // https://platform.openai.com/docs/models/gpt-3.5-turbo + "gpt-3.5-turbo" => Some(ModelInfo { + context_window: 16_385, + max_output_tokens: 4_096, + }), + + _ => None, + } +} diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index d4aa7698..fa25a2fe 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -275,6 +275,10 @@ pub enum EventMsg { /// Agent has completed all actions TaskComplete(TaskCompleteEvent), + /// Token count event, sent periodically to report the number of tokens + /// used in the current session. + TokenCount(TokenUsage), + /// Agent text output message AgentMessage(AgentMessageEvent), @@ -322,6 +326,15 @@ pub struct TaskCompleteEvent { pub last_agent_message: Option, } +#[derive(Debug, Clone, Deserialize, Serialize, Default)] +pub struct TokenUsage { + pub input_tokens: u64, + pub cached_input_tokens: Option, + pub output_tokens: u64, + pub reasoning_output_tokens: Option, + pub total_tokens: u64, +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct AgentMessageEvent { pub message: String, diff --git a/codex-rs/exec/src/event_processor.rs b/codex-rs/exec/src/event_processor.rs index e2a8bbb2..5320c572 100644 --- a/codex-rs/exec/src/event_processor.rs +++ b/codex-rs/exec/src/event_processor.rs @@ -16,6 +16,7 @@ use codex_core::protocol::McpToolCallEndEvent; use codex_core::protocol::PatchApplyBeginEvent; use codex_core::protocol::PatchApplyEndEvent; use codex_core::protocol::SessionConfiguredEvent; +use codex_core::protocol::TokenUsage; use owo_colors::OwoColorize; use owo_colors::Style; use shlex::try_join; @@ -180,6 +181,9 @@ impl EventProcessor { EventMsg::TaskStarted | EventMsg::TaskComplete(_) => { // Ignore. } + EventMsg::TokenCount(TokenUsage { total_tokens, .. }) => { + ts_println!(self, "tokens used: {total_tokens}"); + } EventMsg::AgentMessage(AgentMessageEvent { message }) => { ts_println!( self, diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index 67c990b0..796a119e 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -162,6 +162,7 @@ pub async fn run_codex_tool_session( } EventMsg::Error(_) | EventMsg::TaskStarted + | EventMsg::TokenCount(_) | EventMsg::AgentReasoning(_) | EventMsg::McpToolCallBegin(_) | EventMsg::McpToolCallEnd(_) diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 1218f76e..4ec82990 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -1,3 +1,4 @@ +use codex_core::protocol::TokenUsage; use crossterm::event::KeyEvent; use ratatui::buffer::Buffer; use ratatui::layout::Alignment; @@ -24,6 +25,8 @@ const MIN_TEXTAREA_ROWS: usize = 1; /// Rows consumed by the border. const BORDER_LINES: u16 = 2; +const BASE_PLACEHOLDER_TEXT: &str = "send a message"; + /// Result returned when the user interacts with the text area. pub enum InputResult { Submitted(String), @@ -40,7 +43,7 @@ pub(crate) struct ChatComposer<'a> { impl ChatComposer<'_> { pub fn new(has_input_focus: bool, app_event_tx: AppEventSender) -> Self { let mut textarea = TextArea::default(); - textarea.set_placeholder_text("send a message"); + textarea.set_placeholder_text(BASE_PLACEHOLDER_TEXT); textarea.set_cursor_line_style(ratatui::style::Style::default()); let mut this = Self { @@ -53,6 +56,41 @@ impl ChatComposer<'_> { this } + /// Update the cached *context-left* percentage and refresh the placeholder + /// text. The UI relies on the placeholder to convey the remaining + /// context when the composer is empty. + pub(crate) fn set_token_usage( + &mut self, + token_usage: TokenUsage, + model_context_window: Option, + ) { + let placeholder = match (token_usage.total_tokens, model_context_window) { + (total_tokens, Some(context_window)) => { + let percent_remaining: u8 = if context_window > 0 { + // Calculate the percentage of context left. + let percent = 100.0 - (total_tokens as f32 / context_window as f32 * 100.0); + percent.clamp(0.0, 100.0) as u8 + } else { + // If we don't have a context window, we cannot compute the + // percentage. + 100 + }; + if percent_remaining > 25 { + format!("{BASE_PLACEHOLDER_TEXT} — {percent_remaining}% context left") + } else { + format!( + "{BASE_PLACEHOLDER_TEXT} — {percent_remaining}% context left (consider /compact)" + ) + } + } + (total_tokens, None) => { + format!("{BASE_PLACEHOLDER_TEXT} — {total_tokens} tokens used") + } + }; + + self.textarea.set_placeholder_text(placeholder); + } + /// Record the history metadata advertised by `SessionConfiguredEvent` so /// that the composer can navigate cross-session history. pub(crate) fn set_history_metadata(&mut self, log_id: u64, entry_count: usize) { diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index c654581c..e3234e99 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -2,6 +2,7 @@ use bottom_pane_view::BottomPaneView; use bottom_pane_view::ConditionalUpdate; +use codex_core::protocol::TokenUsage; use crossterm::event::KeyEvent; use ratatui::buffer::Buffer; use ratatui::layout::Rect; @@ -129,6 +130,18 @@ impl BottomPane<'_> { } } + /// Update the *context-window remaining* indicator in the composer. This + /// is forwarded directly to the underlying `ChatComposer`. + pub(crate) fn set_token_usage( + &mut self, + token_usage: TokenUsage, + model_context_window: Option, + ) { + self.composer + .set_token_usage(token_usage, model_context_window); + self.request_redraw(); + } + /// Called when the agent requests user approval. pub fn push_approval_request(&mut self, request: ApprovalRequest) { let request = if let Some(view) = self.active_view.as_mut() { diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index bd5197c7..fad72e3a 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -18,6 +18,7 @@ use codex_core::protocol::McpToolCallEndEvent; use codex_core::protocol::Op; use codex_core::protocol::PatchApplyBeginEvent; use codex_core::protocol::TaskCompleteEvent; +use codex_core::protocol::TokenUsage; use crossterm::event::KeyEvent; use ratatui::buffer::Buffer; use ratatui::layout::Constraint; @@ -46,6 +47,7 @@ pub(crate) struct ChatWidget<'a> { input_focus: InputFocus, config: Config, initial_user_message: Option, + token_usage: TokenUsage, } #[derive(Clone, Copy, Eq, PartialEq)] @@ -131,6 +133,7 @@ impl ChatWidget<'_> { initial_prompt.unwrap_or_default(), initial_images, ), + token_usage: TokenUsage::default(), } } @@ -250,6 +253,11 @@ impl ChatWidget<'_> { self.bottom_pane.set_task_running(false); self.request_redraw(); } + EventMsg::TokenCount(token_usage) => { + self.token_usage = add_token_usage(&self.token_usage, &token_usage); + self.bottom_pane + .set_token_usage(self.token_usage.clone(), self.config.model_context_window); + } EventMsg::Error(ErrorEvent { message }) => { self.conversation_history.add_error(message); self.bottom_pane.set_task_running(false); @@ -410,3 +418,31 @@ impl WidgetRef for &ChatWidget<'_> { (&self.bottom_pane).render(chunks[1], buf); } } + +fn add_token_usage(current_usage: &TokenUsage, new_usage: &TokenUsage) -> TokenUsage { + let cached_input_tokens = match ( + current_usage.cached_input_tokens, + new_usage.cached_input_tokens, + ) { + (Some(current), Some(new)) => Some(current + new), + (Some(current), None) => Some(current), + (None, Some(new)) => Some(new), + (None, None) => None, + }; + let reasoning_output_tokens = match ( + current_usage.reasoning_output_tokens, + new_usage.reasoning_output_tokens, + ) { + (Some(current), Some(new)) => Some(current + new), + (Some(current), None) => Some(current), + (None, Some(new)) => Some(new), + (None, None) => None, + }; + TokenUsage { + input_tokens: current_usage.input_tokens + new_usage.input_tokens, + cached_input_tokens, + output_tokens: current_usage.output_tokens + new_usage.output_tokens, + reasoning_output_tokens, + total_tokens: current_usage.total_tokens + new_usage.total_tokens, + } +}