Files
llmx/codex-rs/core/src/openai_model_info.rs
pakrym-oai 7df9e9c664 Correctly calculate remaining context size (#3190)
We had multiple issues with context size calculation:
1. `initial_prompt_tokens` calculation based on cache size is not
reliable, cache misses might set it to much higher value. For now
hardcoded to a safer constant.
2. Input context size for GPT-5 is 272k (that's where 33% came from).

Fixes.
2025-09-04 23:34:14 +00:00

94 lines
3.2 KiB
Rust

use crate::model_family::ModelFamily;
/// Metadata about a model, particularly OpenAI models.
/// We may want to consider including details like the pricing for
/// input tokens, output tokens, etc., though users will need to be able to
/// override this in config.toml, as this information can get out of date.
/// Though this would help present more accurate pricing information in the UI.
#[derive(Debug)]
pub(crate) struct ModelInfo {
/// Size of the context window in tokens.
pub(crate) context_window: u64,
/// Maximum number of output tokens that can be generated for the model.
pub(crate) max_output_tokens: u64,
}
pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
let slug = model_family.slug.as_str();
match slug {
// OSS models have a 128k shared token pool.
// Arbitrarily splitting it: 3/4 input context, 1/4 output.
// https://openai.com/index/gpt-oss-model-card/
"gpt-oss-20b" => Some(ModelInfo {
context_window: 96_000,
max_output_tokens: 32_000,
}),
"gpt-oss-120b" => Some(ModelInfo {
context_window: 96_000,
max_output_tokens: 32_000,
}),
// https://platform.openai.com/docs/models/o3
"o3" => Some(ModelInfo {
context_window: 200_000,
max_output_tokens: 100_000,
}),
// https://platform.openai.com/docs/models/o4-mini
"o4-mini" => Some(ModelInfo {
context_window: 200_000,
max_output_tokens: 100_000,
}),
// https://platform.openai.com/docs/models/codex-mini-latest
"codex-mini-latest" => Some(ModelInfo {
context_window: 200_000,
max_output_tokens: 100_000,
}),
// As of Jun 25, 2025, gpt-4.1 defaults to gpt-4.1-2025-04-14.
// https://platform.openai.com/docs/models/gpt-4.1
"gpt-4.1" | "gpt-4.1-2025-04-14" => Some(ModelInfo {
context_window: 1_047_576,
max_output_tokens: 32_768,
}),
// As of Jun 25, 2025, gpt-4o defaults to gpt-4o-2024-08-06.
// https://platform.openai.com/docs/models/gpt-4o
"gpt-4o" | "gpt-4o-2024-08-06" => Some(ModelInfo {
context_window: 128_000,
max_output_tokens: 16_384,
}),
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-05-13
"gpt-4o-2024-05-13" => Some(ModelInfo {
context_window: 128_000,
max_output_tokens: 4_096,
}),
// https://platform.openai.com/docs/models/gpt-4o?snapshot=gpt-4o-2024-11-20
"gpt-4o-2024-11-20" => Some(ModelInfo {
context_window: 128_000,
max_output_tokens: 16_384,
}),
// https://platform.openai.com/docs/models/gpt-3.5-turbo
"gpt-3.5-turbo" => Some(ModelInfo {
context_window: 16_385,
max_output_tokens: 4_096,
}),
"gpt-5" => Some(ModelInfo {
context_window: 272_000,
max_output_tokens: 128_000,
}),
_ if slug.starts_with("codex-") => Some(ModelInfo {
context_window: 272_000,
max_output_tokens: 128_000,
}),
_ => None,
}
}