Forward Rate limits to the UI (#3965)
We currently get information about rate limits in the response headers. We want to forward them to the clients to have better transparency. UI/UX plans have been discussed and this information is needed.
This commit is contained in:
@@ -98,6 +98,7 @@ use crate::protocol::ListCustomPromptsResponseEvent;
|
||||
use crate::protocol::Op;
|
||||
use crate::protocol::PatchApplyBeginEvent;
|
||||
use crate::protocol::PatchApplyEndEvent;
|
||||
use crate::protocol::RateLimitSnapshotEvent;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::ReviewOutputEvent;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
@@ -105,6 +106,7 @@ use crate::protocol::SessionConfiguredEvent;
|
||||
use crate::protocol::StreamErrorEvent;
|
||||
use crate::protocol::Submission;
|
||||
use crate::protocol::TaskCompleteEvent;
|
||||
use crate::protocol::TokenCountEvent;
|
||||
use crate::protocol::TokenUsage;
|
||||
use crate::protocol::TokenUsageInfo;
|
||||
use crate::protocol::TurnDiffEvent;
|
||||
@@ -257,6 +259,7 @@ struct State {
|
||||
pending_input: Vec<ResponseInputItem>,
|
||||
history: ConversationHistory,
|
||||
token_info: Option<TokenUsageInfo>,
|
||||
latest_rate_limits: Option<RateLimitSnapshotEvent>,
|
||||
}
|
||||
|
||||
/// Context for an initialized model agent
|
||||
@@ -738,16 +741,30 @@ impl Session {
|
||||
async fn update_token_usage_info(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
token_usage: &Option<TokenUsage>,
|
||||
) -> Option<TokenUsageInfo> {
|
||||
token_usage: Option<&TokenUsage>,
|
||||
) {
|
||||
let mut state = self.state.lock().await;
|
||||
let info = TokenUsageInfo::new_or_append(
|
||||
&state.token_info,
|
||||
token_usage,
|
||||
turn_context.client.get_model_context_window(),
|
||||
);
|
||||
state.token_info = info.clone();
|
||||
info
|
||||
if let Some(token_usage) = token_usage {
|
||||
let info = TokenUsageInfo::new_or_append(
|
||||
&state.token_info,
|
||||
&Some(token_usage.clone()),
|
||||
turn_context.client.get_model_context_window(),
|
||||
);
|
||||
state.token_info = info;
|
||||
}
|
||||
}
|
||||
|
||||
async fn update_rate_limits(&self, new_rate_limits: RateLimitSnapshotEvent) {
|
||||
let mut state = self.state.lock().await;
|
||||
state.latest_rate_limits = Some(new_rate_limits);
|
||||
}
|
||||
|
||||
async fn get_token_count_event(&self) -> TokenCountEvent {
|
||||
let state = self.state.lock().await;
|
||||
TokenCountEvent {
|
||||
info: state.token_info.clone(),
|
||||
rate_limits: state.latest_rate_limits.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a user input item to conversation history and also persist a
|
||||
@@ -2136,17 +2153,22 @@ async fn try_run_turn(
|
||||
})
|
||||
.await;
|
||||
}
|
||||
ResponseEvent::RateLimits(snapshot) => {
|
||||
// Update internal state with latest rate limits, but defer sending until
|
||||
// token usage is available to avoid duplicate TokenCount events.
|
||||
sess.update_rate_limits(snapshot).await;
|
||||
}
|
||||
ResponseEvent::Completed {
|
||||
response_id: _,
|
||||
token_usage,
|
||||
} => {
|
||||
let info = sess
|
||||
.update_token_usage_info(turn_context, &token_usage)
|
||||
sess.update_token_usage_info(turn_context, token_usage.as_ref())
|
||||
.await;
|
||||
let token_event = sess.get_token_count_event().await;
|
||||
let _ = sess
|
||||
.send_event(Event {
|
||||
id: sub_id.to_string(),
|
||||
msg: EventMsg::TokenCount(crate::protocol::TokenCountEvent { info }),
|
||||
msg: EventMsg::TokenCount(token_event),
|
||||
})
|
||||
.await;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user