Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
This commit is contained in:
Ahmed Ibrahim
2025-09-20 21:26:16 -07:00
committed by GitHub
parent 42d335deb8
commit 04504d8218
6 changed files with 192 additions and 12 deletions

View File

@@ -98,6 +98,7 @@ use crate::protocol::ListCustomPromptsResponseEvent;
use crate::protocol::Op;
use crate::protocol::PatchApplyBeginEvent;
use crate::protocol::PatchApplyEndEvent;
use crate::protocol::RateLimitSnapshotEvent;
use crate::protocol::ReviewDecision;
use crate::protocol::ReviewOutputEvent;
use crate::protocol::SandboxPolicy;
@@ -105,6 +106,7 @@ use crate::protocol::SessionConfiguredEvent;
use crate::protocol::StreamErrorEvent;
use crate::protocol::Submission;
use crate::protocol::TaskCompleteEvent;
use crate::protocol::TokenCountEvent;
use crate::protocol::TokenUsage;
use crate::protocol::TokenUsageInfo;
use crate::protocol::TurnDiffEvent;
@@ -257,6 +259,7 @@ struct State {
pending_input: Vec<ResponseInputItem>,
history: ConversationHistory,
token_info: Option<TokenUsageInfo>,
latest_rate_limits: Option<RateLimitSnapshotEvent>,
}
/// Context for an initialized model agent
@@ -738,16 +741,30 @@ impl Session {
async fn update_token_usage_info(
&self,
turn_context: &TurnContext,
token_usage: &Option<TokenUsage>,
) -> Option<TokenUsageInfo> {
token_usage: Option<&TokenUsage>,
) {
let mut state = self.state.lock().await;
let info = TokenUsageInfo::new_or_append(
&state.token_info,
token_usage,
turn_context.client.get_model_context_window(),
);
state.token_info = info.clone();
info
if let Some(token_usage) = token_usage {
let info = TokenUsageInfo::new_or_append(
&state.token_info,
&Some(token_usage.clone()),
turn_context.client.get_model_context_window(),
);
state.token_info = info;
}
}
async fn update_rate_limits(&self, new_rate_limits: RateLimitSnapshotEvent) {
let mut state = self.state.lock().await;
state.latest_rate_limits = Some(new_rate_limits);
}
async fn get_token_count_event(&self) -> TokenCountEvent {
let state = self.state.lock().await;
TokenCountEvent {
info: state.token_info.clone(),
rate_limits: state.latest_rate_limits.clone(),
}
}
/// Record a user input item to conversation history and also persist a
@@ -2136,17 +2153,22 @@ async fn try_run_turn(
})
.await;
}
ResponseEvent::RateLimits(snapshot) => {
// Update internal state with latest rate limits, but defer sending until
// token usage is available to avoid duplicate TokenCount events.
sess.update_rate_limits(snapshot).await;
}
ResponseEvent::Completed {
response_id: _,
token_usage,
} => {
let info = sess
.update_token_usage_info(turn_context, &token_usage)
sess.update_token_usage_info(turn_context, token_usage.as_ref())
.await;
let token_event = sess.get_token_count_event().await;
let _ = sess
.send_event(Event {
id: sub_id.to_string(),
msg: EventMsg::TokenCount(crate::protocol::TokenCountEvent { info }),
msg: EventMsg::TokenCount(token_event),
})
.await;