2025-08-04 23:50:03 -07:00
|
|
|
use crate::model_family::ModelFamily;
|
|
|
|
|
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
/// Metadata about a model, particularly OpenAI models.
|
|
|
|
|
/// We may want to consider including details like the pricing for
|
|
|
|
|
/// input tokens, output tokens, etc., though users will need to be able to
|
|
|
|
|
/// override this in config.toml, as this information can get out of date.
|
|
|
|
|
/// Though this would help present more accurate pricing information in the UI.
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
|
pub(crate) struct ModelInfo {
|
|
|
|
|
/// Size of the context window in tokens.
|
|
|
|
|
pub(crate) context_window: u64,
|
|
|
|
|
|
|
|
|
|
/// Maximum number of output tokens that can be generated for the model.
|
|
|
|
|
pub(crate) max_output_tokens: u64,
|
|
|
|
|
}
|
|
|
|
|
|
2025-08-04 23:50:03 -07:00
|
|
|
pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
|
2025-08-13 17:02:50 -07:00
|
|
|
let slug = model_family.slug.as_str();
|
|
|
|
|
match slug {
|
2025-08-05 22:35:00 -07:00
|
|
|
// OSS models have a 128k shared token pool.
|
|
|
|
|
// Arbitrarily splitting it: 3/4 input context, 1/4 output.
|
|
|
|
|
// https://openai.com/index/gpt-oss-model-card/
|
|
|
|
|
"gpt-oss-20b" => Some(ModelInfo {
|
|
|
|
|
context_window: 96_000,
|
|
|
|
|
max_output_tokens: 32_000,
|
|
|
|
|
}),
|
|
|
|
|
"gpt-oss-120b" => Some(ModelInfo {
|
|
|
|
|
context_window: 96_000,
|
|
|
|
|
max_output_tokens: 32_000,
|
|
|
|
|
}),
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
// https://platform.openai.com/docs/models/o3
|
|
|
|
|
"o3" => Some(ModelInfo {
|
|
|
|
|
context_window: 200_000,
|
|
|
|
|
max_output_tokens: 100_000,
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// https://platform.openai.com/docs/models/o4-mini
|
|
|
|
|
"o4-mini" => Some(ModelInfo {
|
|
|
|
|
context_window: 200_000,
|
|
|
|
|
max_output_tokens: 100_000,
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// https://platform.openai.com/docs/models/codex-mini-latest
|
|
|
|
|
"codex-mini-latest" => Some(ModelInfo {
|
|
|
|
|
context_window: 200_000,
|
|
|
|
|
max_output_tokens: 100_000,
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
|
|
|
|
|
// https://platform.openai.com/docs/models/gpt-4.1
|
|
|
|
|
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
|
|
|
|
|
context_window: 1_047_576,
|
|
|
|
|
max_output_tokens: 32_768,
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
|
|
|
|
|
// https://platform.openai.com/docs/models/gpt-4o
|
|
|
|
|
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
|
|
|
|
|
context_window: 128_000,
|
|
|
|
|
max_output_tokens: 16_384,
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
|
|
|
|
|
"gpt-4o-2024-05-13" => Some(ModelInfo {
|
|
|
|
|
context_window: 128_000,
|
|
|
|
|
max_output_tokens: 4_096,
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
|
|
|
|
|
"gpt-4o-2024-11-20" => Some(ModelInfo {
|
|
|
|
|
context_window: 128_000,
|
|
|
|
|
max_output_tokens: 16_384,
|
|
|
|
|
}),
|
|
|
|
|
|
|
|
|
|
// https://platform.openai.com/docs/models/gpt-3.5-turbo
|
|
|
|
|
"gpt-3.5-turbo" => Some(ModelInfo {
|
|
|
|
|
context_window: 16_385,
|
|
|
|
|
max_output_tokens: 4_096,
|
|
|
|
|
}),
|
|
|
|
|
|
2025-08-07 09:07:51 -07:00
|
|
|
"gpt-5" => Some(ModelInfo {
|
2025-09-04 16:34:14 -07:00
|
|
|
context_window: 272_000,
|
2025-08-24 16:45:41 -07:00
|
|
|
max_output_tokens: 128_000,
|
2025-08-06 16:14:02 -07:00
|
|
|
}),
|
|
|
|
|
|
2025-08-13 17:02:50 -07:00
|
|
|
_ if slug.starts_with("codex-") => Some(ModelInfo {
|
2025-09-04 16:34:14 -07:00
|
|
|
context_window: 272_000,
|
2025-08-24 16:45:41 -07:00
|
|
|
max_output_tokens: 128_000,
|
2025-08-13 17:02:50 -07:00
|
|
|
}),
|
|
|
|
|
|
feat: show number of tokens remaining in UI (#1388)
When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.
When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.
Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.
Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:
https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16
Fixes https://github.com/openai/codex/issues/1242
2025-06-25 23:31:11 -07:00
|
|
|
_ => None,
|
|
|
|
|
}
|
|
|
|
|
}
|