Include reasoning tokens in the context window calculation (#6161)

This value is used to determine whether mid-turn compaction is required.
Reasoning items are only excluded between turns (and soon will start to
be preserved even across turns) so it's incorrect to subtract
reasoning_output_tokens mid term.

This will result in higher values reported between turns but we are also
looking into preserving reasoning items for the entire conversation to
improve performance and caching.
This commit is contained in:
pakrym-oai
2025-11-03 10:02:23 -08:00
committed by GitHub
parent 7bc3ca9e40
commit e5e13479d0
4 changed files with 4 additions and 8 deletions

View File

@@ -812,12 +812,8 @@ impl TokenUsage {
(self.non_cached_input() + self.output_tokens.max(0)).max(0)
}
/// For estimating what % of the model's context window is used, we need to account
/// for reasoning output tokens from prior turns being dropped from the context window.
/// We approximate this here by subtracting reasoning output tokens from the total.
/// This will be off for the current turn and pending function calls.
pub fn tokens_in_context_window(&self) -> i64 {
(self.total_tokens - self.reasoning_output_tokens).max(0)
self.total_tokens
}
/// Estimate the remaining user-controllable percentage of the model's context window.

View File

@@ -17,7 +17,7 @@ expression: sanitized
│ Agents.md: <none> │
│ │
│ Token usage: 1.9K total (1K input + 900 output) │
│ Context window: 100% left (2.1K used / 272K)
│ Context window: 100% left (2.25K used / 272K) │
│ 5h limit: [███████████████░░░░░] 72% used (resets 03:14) │
│ Weekly limit: [█████████░░░░░░░░░░░] 45% used (resets 03:24) │
╰─────────────────────────────────────────────────────────────────────╯

View File

@@ -17,7 +17,7 @@ expression: sanitized
│ Agents.md: <none> │
│ │
│ Token usage: 1.9K total (1K input + 900 output) │
│ Context window: 100% left (2.1K used / 272K)
│ Context window: 100% left (2.25K used / 272K) │
│ 5h limit: [███████████████░░░░░] 72% used (resets 03:14) │
│ Weekly limit: [████████░░░░░░░░░░░░] 40% used (resets 03:34) │
│ Warning: limits may be stale - start new turn to refresh. │

View File

@@ -19,7 +19,7 @@ expression: sanitized
│ Agents.md: <none> │
│ │
│ Token usage: 1.9K total (1K input + │
│ Context window: 100% left (2.1K used /
│ Context window: 100% left (2.25K used / │
│ 5h limit: [███████████████░░░░░] │
│ (resets 03:14) │
╰────────────────────────────────────────────╯