Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
This commit is contained in:
pakrym-oai
2025-09-06 08:19:23 -07:00
committed by GitHub
parent 70a6d4b1b4
commit 0269096229
9 changed files with 151 additions and 117 deletions

View File

@@ -398,9 +398,15 @@ impl From<ResponseCompletedUsage> for TokenUsage {
fn from(val: ResponseCompletedUsage) -> Self {
TokenUsage {
input_tokens: val.input_tokens,
cached_input_tokens: val.input_tokens_details.map(|d| d.cached_tokens),
cached_input_tokens: val
.input_tokens_details
.map(|d| d.cached_tokens)
.unwrap_or(0),
output_tokens: val.output_tokens,
reasoning_output_tokens: val.output_tokens_details.map(|d| d.reasoning_tokens),
reasoning_output_tokens: val
.output_tokens_details
.map(|d| d.reasoning_tokens)
.unwrap_or(0),
total_tokens: val.total_tokens,
}
}

View File

@@ -99,6 +99,7 @@ use crate::protocol::SessionConfiguredEvent;
use crate::protocol::StreamErrorEvent;
use crate::protocol::Submission;
use crate::protocol::TaskCompleteEvent;
use crate::protocol::TokenUsageInfo;
use crate::protocol::TurnDiffEvent;
use crate::protocol::WebSearchBeginEvent;
use crate::rollout::RolloutRecorder;
@@ -261,6 +262,7 @@ struct State {
pending_approvals: HashMap<String, oneshot::Sender<ReviewDecision>>,
pending_input: Vec<ResponseInputItem>,
history: ConversationHistory,
token_info: Option<TokenUsageInfo>,
}
/// Context for an initialized model agent
@@ -1767,15 +1769,23 @@ async fn try_run_turn(
response_id: _,
token_usage,
} => {
if let Some(token_usage) = token_usage {
sess.tx_event
.send(Event {
id: sub_id.to_string(),
msg: EventMsg::TokenCount(token_usage),
})
.await
.ok();
}
let info = {
let mut st = sess.state.lock_unchecked();
let info = TokenUsageInfo::new_or_append(
&st.token_info,
&token_usage,
turn_context.client.get_model_context_window(),
);
st.token_info = info.clone();
info
};
sess.tx_event
.send(Event {
id: sub_id.to_string(),
msg: EventMsg::TokenCount(crate::protocol::TokenCountEvent { info }),
})
.await
.ok();
let unified_diff = turn_diff_tracker.get_unified_diff();
if let Ok(Some(unified_diff)) = unified_diff {
@@ -2841,13 +2851,21 @@ async fn drain_to_completed(
response_id: _,
token_usage,
}) => {
// some providers don't return token usage, so we default
// TODO: consider approximate token usage
let token_usage = token_usage.unwrap_or_default();
let info = {
let mut st = sess.state.lock_unchecked();
let info = TokenUsageInfo::new_or_append(
&st.token_info,
&token_usage,
turn_context.client.get_model_context_window(),
);
st.token_info = info.clone();
info
};
sess.tx_event
.send(Event {
id: sub_id.to_string(),
msg: EventMsg::TokenCount(token_usage),
msg: EventMsg::TokenCount(crate::protocol::TokenCountEvent { info }),
})
.await
.ok();