Add OSS model info (#1860)

Add somewhat arbitrarily chosen context window/output limit.
This commit is contained in:
easong-openai
2025-08-05 22:35:00 -07:00
committed by GitHub
parent 966d957faf
commit f8d70d67b6

View File

@@ -14,10 +14,19 @@ pub(crate) struct ModelInfo {
pub(crate) max_output_tokens: u64,
}
/// Note details such as what a model like gpt-4o is aliased to may be out of
/// date.
pub(crate) fn get_model_info(model_family: &ModelFamily) -> Option<ModelInfo> {
match model_family.slug.as_str() {
// OSS models have a 128k shared token pool.
// Arbitrarily splitting it: 3/4 input context, 1/4 output.
// https://openai.com/index/gpt-oss-model-card/
"gpt-oss-20b" => Some(ModelInfo {
context_window: 96_000,
max_output_tokens: 32_000,
}),
"gpt-oss-120b" => Some(ModelInfo {
context_window: 96_000,
max_output_tokens: 32_000,
}),
// https://platform.openai.com/docs/models/o3
"o3" => Some(ModelInfo {
context_window: 200_000,