Move token usage/context information to session level (#3221)
Move context information into the main loop so it can be used to interrupt the loop or start auto-compaction.
This commit is contained in:
@@ -417,9 +417,9 @@ pub enum EventMsg {
|
||||
/// Agent has completed all actions
|
||||
TaskComplete(TaskCompleteEvent),
|
||||
|
||||
/// Token count event, sent periodically to report the number of tokens
|
||||
/// used in the current session.
|
||||
TokenCount(TokenUsage),
|
||||
/// Usage update for the current session, including totals and last turn.
|
||||
/// Optional means unknown — UIs should not display when `None`.
|
||||
TokenCount(TokenCountEvent),
|
||||
|
||||
/// Agent text output message
|
||||
AgentMessage(AgentMessageEvent),
|
||||
@@ -521,12 +521,54 @@ pub struct TaskStartedEvent {
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, Default)]
|
||||
pub struct TokenUsage {
|
||||
pub input_tokens: u64,
|
||||
pub cached_input_tokens: Option<u64>,
|
||||
pub cached_input_tokens: u64,
|
||||
pub output_tokens: u64,
|
||||
pub reasoning_output_tokens: Option<u64>,
|
||||
pub reasoning_output_tokens: u64,
|
||||
pub total_tokens: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct TokenUsageInfo {
|
||||
pub total_token_usage: TokenUsage,
|
||||
pub last_token_usage: TokenUsage,
|
||||
pub model_context_window: Option<u64>,
|
||||
}
|
||||
|
||||
impl TokenUsageInfo {
|
||||
pub fn new_or_append(
|
||||
info: &Option<TokenUsageInfo>,
|
||||
last: &Option<TokenUsage>,
|
||||
model_context_window: Option<u64>,
|
||||
) -> Option<Self> {
|
||||
if info.is_none() && last.is_none() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut info = match info {
|
||||
Some(info) => info.clone(),
|
||||
None => Self {
|
||||
total_token_usage: TokenUsage::default(),
|
||||
last_token_usage: TokenUsage::default(),
|
||||
model_context_window,
|
||||
},
|
||||
};
|
||||
if let Some(last) = last {
|
||||
info.append_last_usage(last);
|
||||
}
|
||||
Some(info)
|
||||
}
|
||||
|
||||
pub fn append_last_usage(&mut self, last: &TokenUsage) {
|
||||
self.total_token_usage.add_assign(last);
|
||||
self.last_token_usage = last.clone();
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct TokenCountEvent {
|
||||
pub info: Option<TokenUsageInfo>,
|
||||
}
|
||||
|
||||
// Includes prompts, tools and space to call compact.
|
||||
const BASELINE_TOKENS: u64 = 12000;
|
||||
|
||||
@@ -536,7 +578,7 @@ impl TokenUsage {
|
||||
}
|
||||
|
||||
pub fn cached_input(&self) -> u64 {
|
||||
self.cached_input_tokens.unwrap_or(0)
|
||||
self.cached_input_tokens
|
||||
}
|
||||
|
||||
pub fn non_cached_input(&self) -> u64 {
|
||||
@@ -554,7 +596,7 @@ impl TokenUsage {
|
||||
/// This will be off for the current turn and pending function calls.
|
||||
pub fn tokens_in_context_window(&self) -> u64 {
|
||||
self.total_tokens
|
||||
.saturating_sub(self.reasoning_output_tokens.unwrap_or(0))
|
||||
.saturating_sub(self.reasoning_output_tokens)
|
||||
}
|
||||
|
||||
/// Estimate the remaining user-controllable percentage of the model's context window.
|
||||
@@ -579,6 +621,15 @@ impl TokenUsage {
|
||||
let remaining = effective_window.saturating_sub(used);
|
||||
((remaining as f32 / effective_window as f32) * 100.0).clamp(0.0, 100.0) as u8
|
||||
}
|
||||
|
||||
/// In-place element-wise sum of token counts.
|
||||
pub fn add_assign(&mut self, other: &TokenUsage) {
|
||||
self.input_tokens += other.input_tokens;
|
||||
self.cached_input_tokens += other.cached_input_tokens;
|
||||
self.output_tokens += other.output_tokens;
|
||||
self.reasoning_output_tokens += other.reasoning_output_tokens;
|
||||
self.total_tokens += other.total_tokens;
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
@@ -606,10 +657,11 @@ impl fmt::Display for FinalOutput {
|
||||
String::new()
|
||||
},
|
||||
token_usage.output_tokens,
|
||||
token_usage
|
||||
.reasoning_output_tokens
|
||||
.map(|r| format!(" (reasoning {r})"))
|
||||
.unwrap_or_default()
|
||||
if token_usage.reasoning_output_tokens > 0 {
|
||||
format!(" (reasoning {})", token_usage.reasoning_output_tokens)
|
||||
} else {
|
||||
String::new()
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user