Include reasoning tokens in the context window calculation (#6161)
This value is used to determine whether mid-turn compaction is required. Reasoning items are only excluded between turns (and soon will start to be preserved even across turns) so it's incorrect to subtract reasoning_output_tokens mid term. This will result in higher values reported between turns but we are also looking into preserving reasoning items for the entire conversation to improve performance and caching.
This commit is contained in:
@@ -812,12 +812,8 @@ impl TokenUsage {
|
||||
(self.non_cached_input() + self.output_tokens.max(0)).max(0)
|
||||
}
|
||||
|
||||
/// For estimating what % of the model's context window is used, we need to account
|
||||
/// for reasoning output tokens from prior turns being dropped from the context window.
|
||||
/// We approximate this here by subtracting reasoning output tokens from the total.
|
||||
/// This will be off for the current turn and pending function calls.
|
||||
pub fn tokens_in_context_window(&self) -> i64 {
|
||||
(self.total_tokens - self.reasoning_output_tokens).max(0)
|
||||
self.total_tokens
|
||||
}
|
||||
|
||||
/// Estimate the remaining user-controllable percentage of the model's context window.
|
||||
|
||||
@@ -17,7 +17,7 @@ expression: sanitized
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.1K used / 272K) │
|
||||
│ Context window: 100% left (2.25K used / 272K) │
|
||||
│ 5h limit: [███████████████░░░░░] 72% used (resets 03:14) │
|
||||
│ Weekly limit: [█████████░░░░░░░░░░░] 45% used (resets 03:24) │
|
||||
╰─────────────────────────────────────────────────────────────────────╯
|
||||
|
||||
@@ -17,7 +17,7 @@ expression: sanitized
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + 900 output) │
|
||||
│ Context window: 100% left (2.1K used / 272K) │
|
||||
│ Context window: 100% left (2.25K used / 272K) │
|
||||
│ 5h limit: [███████████████░░░░░] 72% used (resets 03:14) │
|
||||
│ Weekly limit: [████████░░░░░░░░░░░░] 40% used (resets 03:34) │
|
||||
│ Warning: limits may be stale - start new turn to refresh. │
|
||||
|
||||
@@ -19,7 +19,7 @@ expression: sanitized
|
||||
│ Agents.md: <none> │
|
||||
│ │
|
||||
│ Token usage: 1.9K total (1K input + │
|
||||
│ Context window: 100% left (2.1K used / │
|
||||
│ Context window: 100% left (2.25K used / │
|
||||
│ 5h limit: [███████████████░░░░░] │
|
||||
│ (resets 03:14) │
|
||||
╰────────────────────────────────────────────╯
|
||||
|
||||
Reference in New Issue
Block a user