Correctly calculate remaining context size (#3190)

We had multiple issues with context size calculation:
1. `initial_prompt_tokens` calculation based on cache size is not
reliable, cache misses might set it to much higher value. For now
hardcoded to a safer constant.
2. Input context size for GPT-5 is 272k (that's where 33% came from).

Fixes.
This commit is contained in:
pakrym-oai
2025-09-04 16:34:14 -07:00
committed by GitHub
parent b795fbe244
commit 7df9e9c664
4 changed files with 12 additions and 32 deletions

View File

@@ -67,15 +67,6 @@ struct TokenUsageInfo {
total_token_usage: TokenUsage,
last_token_usage: TokenUsage,
model_context_window: Option<u64>,
/// Baseline token count present in the context before the user's first
/// message content is considered. This is used to normalize the
/// "context left" percentage so it reflects the portion the user can
/// influence rather than fixed prompt overhead (system prompt, tool
/// instructions, etc.).
///
/// Preferred source is `cached_input_tokens` from the first turn (when
/// available), otherwise we fall back to 0.
initial_prompt_tokens: u64,
}
pub(crate) struct ChatComposer {
@@ -181,17 +172,10 @@ impl ChatComposer {
last_token_usage: TokenUsage,
model_context_window: Option<u64>,
) {
let initial_prompt_tokens = self
.token_usage_info
.as_ref()
.map(|info| info.initial_prompt_tokens)
.unwrap_or_else(|| last_token_usage.cached_input_tokens.unwrap_or(0));
self.token_usage_info = Some(TokenUsageInfo {
total_token_usage,
last_token_usage,
model_context_window,
initial_prompt_tokens,
});
}
@@ -1302,10 +1286,7 @@ impl WidgetRef for ChatComposer {
let last_token_usage = &token_usage_info.last_token_usage;
if let Some(context_window) = token_usage_info.model_context_window {
let percent_remaining: u8 = if context_window > 0 {
last_token_usage.percent_of_context_window_remaining(
context_window,
token_usage_info.initial_prompt_tokens,
)
last_token_usage.percent_of_context_window_remaining(context_window)
} else {
100
};