Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
This commit is contained in:
pakrym-oai
2025-09-06 08:19:23 -07:00
committed by GitHub
parent 70a6d4b1b4
commit 0269096229
9 changed files with 151 additions and 117 deletions

View File

@@ -417,9 +417,9 @@ pub enum EventMsg {
/// Agent has completed all actions
TaskComplete(TaskCompleteEvent),
/// Token count event, sent periodically to report the number of tokens
/// used in the current session.
TokenCount(TokenUsage),
/// Usage update for the current session, including totals and last turn.
/// Optional means unknown — UIs should not display when `None`.
TokenCount(TokenCountEvent),
/// Agent text output message
AgentMessage(AgentMessageEvent),
@@ -521,12 +521,54 @@ pub struct TaskStartedEvent {
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
pub struct TokenUsage {
pub input_tokens: u64,
pub cached_input_tokens: Option<u64>,
pub cached_input_tokens: u64,
pub output_tokens: u64,
pub reasoning_output_tokens: Option<u64>,
pub reasoning_output_tokens: u64,
pub total_tokens: u64,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct TokenUsageInfo {
pub total_token_usage: TokenUsage,
pub last_token_usage: TokenUsage,
pub model_context_window: Option<u64>,
}
impl TokenUsageInfo {
pub fn new_or_append(
info: &Option<TokenUsageInfo>,
last: &Option<TokenUsage>,
model_context_window: Option<u64>,
) -> Option<Self> {
if info.is_none() && last.is_none() {
return None;
}
let mut info = match info {
Some(info) => info.clone(),
None => Self {
total_token_usage: TokenUsage::default(),
last_token_usage: TokenUsage::default(),
model_context_window,
},
};
if let Some(last) = last {
info.append_last_usage(last);
}
Some(info)
}
pub fn append_last_usage(&mut self, last: &TokenUsage) {
self.total_token_usage.add_assign(last);
self.last_token_usage = last.clone();
}
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct TokenCountEvent {
pub info: Option<TokenUsageInfo>,
}
// Includes prompts, tools and space to call compact.
const BASELINE_TOKENS: u64 = 12000;
@@ -536,7 +578,7 @@ impl TokenUsage {
}
pub fn cached_input(&self) -> u64 {
self.cached_input_tokens.unwrap_or(0)
self.cached_input_tokens
}
pub fn non_cached_input(&self) -> u64 {
@@ -554,7 +596,7 @@ impl TokenUsage {
/// This will be off for the current turn and pending function calls.
pub fn tokens_in_context_window(&self) -> u64 {
self.total_tokens
.saturating_sub(self.reasoning_output_tokens.unwrap_or(0))
.saturating_sub(self.reasoning_output_tokens)
}
/// Estimate the remaining user-controllable percentage of the model's context window.
@@ -579,6 +621,15 @@ impl TokenUsage {
let remaining = effective_window.saturating_sub(used);
((remaining as f32 / effective_window as f32) * 100.0).clamp(0.0, 100.0) as u8
}
/// In-place element-wise sum of token counts.
pub fn add_assign(&mut self, other: &TokenUsage) {
self.input_tokens += other.input_tokens;
self.cached_input_tokens += other.cached_input_tokens;
self.output_tokens += other.output_tokens;
self.reasoning_output_tokens += other.reasoning_output_tokens;
self.total_tokens += other.total_tokens;
}
}
#[derive(Debug, Clone, Deserialize, Serialize)]
@@ -606,10 +657,11 @@ impl fmt::Display for FinalOutput {
String::new()
},
token_usage.output_tokens,
token_usage
.reasoning_output_tokens
.map(|r| format!(" (reasoning {r})"))
.unwrap_or_default()
if token_usage.reasoning_output_tokens > 0 {
format!(" (reasoning {})", token_usage.reasoning_output_tokens)
} else {
String::new()
}
)
}
}