Send limits when getting rate limited (#4102)
Users need visibility on rate limits when they are rate limited.
This commit is contained in:
@@ -42,7 +42,7 @@ use crate::model_provider_info::ModelProviderInfo;
|
|||||||
use crate::model_provider_info::WireApi;
|
use crate::model_provider_info::WireApi;
|
||||||
use crate::openai_model_info::get_model_info;
|
use crate::openai_model_info::get_model_info;
|
||||||
use crate::openai_tools::create_tools_json_for_responses_api;
|
use crate::openai_tools::create_tools_json_for_responses_api;
|
||||||
use crate::protocol::RateLimitSnapshotEvent;
|
use crate::protocol::RateLimitSnapshot;
|
||||||
use crate::protocol::TokenUsage;
|
use crate::protocol::TokenUsage;
|
||||||
use crate::token_data::PlanType;
|
use crate::token_data::PlanType;
|
||||||
use crate::util::backoff;
|
use crate::util::backoff;
|
||||||
@@ -330,6 +330,7 @@ impl ModelClient {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if status == StatusCode::TOO_MANY_REQUESTS {
|
if status == StatusCode::TOO_MANY_REQUESTS {
|
||||||
|
let rate_limit_snapshot = parse_rate_limit_snapshot(res.headers());
|
||||||
let body = res.json::<ErrorResponse>().await.ok();
|
let body = res.json::<ErrorResponse>().await.ok();
|
||||||
if let Some(ErrorResponse { error }) = body {
|
if let Some(ErrorResponse { error }) = body {
|
||||||
if error.r#type.as_deref() == Some("usage_limit_reached") {
|
if error.r#type.as_deref() == Some("usage_limit_reached") {
|
||||||
@@ -343,6 +344,7 @@ impl ModelClient {
|
|||||||
return Err(CodexErr::UsageLimitReached(UsageLimitReachedError {
|
return Err(CodexErr::UsageLimitReached(UsageLimitReachedError {
|
||||||
plan_type,
|
plan_type,
|
||||||
resets_in_seconds,
|
resets_in_seconds,
|
||||||
|
rate_limits: rate_limit_snapshot,
|
||||||
}));
|
}));
|
||||||
} else if error.r#type.as_deref() == Some("usage_not_included") {
|
} else if error.r#type.as_deref() == Some("usage_not_included") {
|
||||||
return Err(CodexErr::UsageNotIncluded);
|
return Err(CodexErr::UsageNotIncluded);
|
||||||
@@ -485,7 +487,7 @@ fn attach_item_ids(payload_json: &mut Value, original_items: &[ResponseItem]) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshotEvent> {
|
fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshot> {
|
||||||
let primary_used_percent = parse_header_f64(headers, "x-codex-primary-used-percent")?;
|
let primary_used_percent = parse_header_f64(headers, "x-codex-primary-used-percent")?;
|
||||||
let secondary_used_percent = parse_header_f64(headers, "x-codex-secondary-used-percent")?;
|
let secondary_used_percent = parse_header_f64(headers, "x-codex-secondary-used-percent")?;
|
||||||
let primary_to_secondary_ratio_percent =
|
let primary_to_secondary_ratio_percent =
|
||||||
@@ -493,7 +495,7 @@ fn parse_rate_limit_snapshot(headers: &HeaderMap) -> Option<RateLimitSnapshotEve
|
|||||||
let primary_window_minutes = parse_header_u64(headers, "x-codex-primary-window-minutes")?;
|
let primary_window_minutes = parse_header_u64(headers, "x-codex-primary-window-minutes")?;
|
||||||
let secondary_window_minutes = parse_header_u64(headers, "x-codex-secondary-window-minutes")?;
|
let secondary_window_minutes = parse_header_u64(headers, "x-codex-secondary-window-minutes")?;
|
||||||
|
|
||||||
Some(RateLimitSnapshotEvent {
|
Some(RateLimitSnapshot {
|
||||||
primary_used_percent,
|
primary_used_percent,
|
||||||
secondary_used_percent,
|
secondary_used_percent,
|
||||||
primary_to_secondary_ratio_percent,
|
primary_to_secondary_ratio_percent,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use crate::error::Result;
|
use crate::error::Result;
|
||||||
use crate::model_family::ModelFamily;
|
use crate::model_family::ModelFamily;
|
||||||
use crate::openai_tools::OpenAiTool;
|
use crate::openai_tools::OpenAiTool;
|
||||||
use crate::protocol::RateLimitSnapshotEvent;
|
use crate::protocol::RateLimitSnapshot;
|
||||||
use crate::protocol::TokenUsage;
|
use crate::protocol::TokenUsage;
|
||||||
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
|
use codex_apply_patch::APPLY_PATCH_TOOL_INSTRUCTIONS;
|
||||||
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
|
||||||
@@ -83,7 +83,7 @@ pub enum ResponseEvent {
|
|||||||
WebSearchCallBegin {
|
WebSearchCallBegin {
|
||||||
call_id: String,
|
call_id: String,
|
||||||
},
|
},
|
||||||
RateLimits(RateLimitSnapshotEvent),
|
RateLimits(RateLimitSnapshot),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Serialize)]
|
#[derive(Debug, Serialize)]
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ use crate::protocol::ListCustomPromptsResponseEvent;
|
|||||||
use crate::protocol::Op;
|
use crate::protocol::Op;
|
||||||
use crate::protocol::PatchApplyBeginEvent;
|
use crate::protocol::PatchApplyBeginEvent;
|
||||||
use crate::protocol::PatchApplyEndEvent;
|
use crate::protocol::PatchApplyEndEvent;
|
||||||
use crate::protocol::RateLimitSnapshotEvent;
|
use crate::protocol::RateLimitSnapshot;
|
||||||
use crate::protocol::ReviewDecision;
|
use crate::protocol::ReviewDecision;
|
||||||
use crate::protocol::ReviewOutputEvent;
|
use crate::protocol::ReviewOutputEvent;
|
||||||
use crate::protocol::SandboxPolicy;
|
use crate::protocol::SandboxPolicy;
|
||||||
@@ -261,7 +261,7 @@ struct State {
|
|||||||
pending_input: Vec<ResponseInputItem>,
|
pending_input: Vec<ResponseInputItem>,
|
||||||
history: ConversationHistory,
|
history: ConversationHistory,
|
||||||
token_info: Option<TokenUsageInfo>,
|
token_info: Option<TokenUsageInfo>,
|
||||||
latest_rate_limits: Option<RateLimitSnapshotEvent>,
|
latest_rate_limits: Option<RateLimitSnapshot>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Context for an initialized model agent
|
/// Context for an initialized model agent
|
||||||
@@ -739,31 +739,42 @@ impl Session {
|
|||||||
|
|
||||||
async fn update_token_usage_info(
|
async fn update_token_usage_info(
|
||||||
&self,
|
&self,
|
||||||
|
sub_id: &str,
|
||||||
turn_context: &TurnContext,
|
turn_context: &TurnContext,
|
||||||
token_usage: Option<&TokenUsage>,
|
token_usage: Option<&TokenUsage>,
|
||||||
) {
|
) {
|
||||||
let mut state = self.state.lock().await;
|
{
|
||||||
if let Some(token_usage) = token_usage {
|
let mut state = self.state.lock().await;
|
||||||
let info = TokenUsageInfo::new_or_append(
|
if let Some(token_usage) = token_usage {
|
||||||
&state.token_info,
|
let info = TokenUsageInfo::new_or_append(
|
||||||
&Some(token_usage.clone()),
|
&state.token_info,
|
||||||
turn_context.client.get_model_context_window(),
|
&Some(token_usage.clone()),
|
||||||
);
|
turn_context.client.get_model_context_window(),
|
||||||
state.token_info = info;
|
);
|
||||||
|
state.token_info = info;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
self.send_token_count_event(sub_id).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn update_rate_limits(&self, new_rate_limits: RateLimitSnapshotEvent) {
|
async fn update_rate_limits(&self, sub_id: &str, new_rate_limits: RateLimitSnapshot) {
|
||||||
let mut state = self.state.lock().await;
|
{
|
||||||
state.latest_rate_limits = Some(new_rate_limits);
|
let mut state = self.state.lock().await;
|
||||||
|
state.latest_rate_limits = Some(new_rate_limits);
|
||||||
|
}
|
||||||
|
self.send_token_count_event(sub_id).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_token_count_event(&self) -> TokenCountEvent {
|
async fn send_token_count_event(&self, sub_id: &str) {
|
||||||
let state = self.state.lock().await;
|
let (info, rate_limits) = {
|
||||||
TokenCountEvent {
|
let state = self.state.lock().await;
|
||||||
info: state.token_info.clone(),
|
(state.token_info.clone(), state.latest_rate_limits.clone())
|
||||||
rate_limits: state.latest_rate_limits.clone(),
|
};
|
||||||
}
|
let event = Event {
|
||||||
|
id: sub_id.to_string(),
|
||||||
|
msg: EventMsg::TokenCount(TokenCountEvent { info, rate_limits }),
|
||||||
|
};
|
||||||
|
self.send_event(event).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Record a user input item to conversation history and also persist a
|
/// Record a user input item to conversation history and also persist a
|
||||||
@@ -1957,9 +1968,14 @@ async fn run_turn(
|
|||||||
Ok(output) => return Ok(output),
|
Ok(output) => return Ok(output),
|
||||||
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
|
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
|
||||||
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
|
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
|
||||||
Err(e @ (CodexErr::UsageLimitReached(_) | CodexErr::UsageNotIncluded)) => {
|
Err(CodexErr::UsageLimitReached(e)) => {
|
||||||
return Err(e);
|
let rate_limits = e.rate_limits.clone();
|
||||||
|
if let Some(rate_limits) = rate_limits {
|
||||||
|
sess.update_rate_limits(&sub_id, rate_limits).await;
|
||||||
|
}
|
||||||
|
return Err(CodexErr::UsageLimitReached(e));
|
||||||
}
|
}
|
||||||
|
Err(CodexErr::UsageNotIncluded) => return Err(CodexErr::UsageNotIncluded),
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// Use the configured provider-specific stream retry budget.
|
// Use the configured provider-specific stream retry budget.
|
||||||
let max_retries = turn_context.client.get_provider().stream_max_retries();
|
let max_retries = turn_context.client.get_provider().stream_max_retries();
|
||||||
@@ -2132,20 +2148,13 @@ async fn try_run_turn(
|
|||||||
ResponseEvent::RateLimits(snapshot) => {
|
ResponseEvent::RateLimits(snapshot) => {
|
||||||
// Update internal state with latest rate limits, but defer sending until
|
// Update internal state with latest rate limits, but defer sending until
|
||||||
// token usage is available to avoid duplicate TokenCount events.
|
// token usage is available to avoid duplicate TokenCount events.
|
||||||
sess.update_rate_limits(snapshot).await;
|
sess.update_rate_limits(sub_id, snapshot).await;
|
||||||
}
|
}
|
||||||
ResponseEvent::Completed {
|
ResponseEvent::Completed {
|
||||||
response_id: _,
|
response_id: _,
|
||||||
token_usage,
|
token_usage,
|
||||||
} => {
|
} => {
|
||||||
sess.update_token_usage_info(turn_context, token_usage.as_ref())
|
sess.update_token_usage_info(sub_id, turn_context, token_usage.as_ref())
|
||||||
.await;
|
|
||||||
let token_event = sess.get_token_count_event().await;
|
|
||||||
let _ = sess
|
|
||||||
.send_event(Event {
|
|
||||||
id: sub_id.to_string(),
|
|
||||||
msg: EventMsg::TokenCount(token_event),
|
|
||||||
})
|
|
||||||
.await;
|
.await;
|
||||||
|
|
||||||
let unified_diff = turn_diff_tracker.get_unified_diff();
|
let unified_diff = turn_diff_tracker.get_unified_diff();
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ use crate::exec::ExecToolCallOutput;
|
|||||||
use crate::token_data::KnownPlan;
|
use crate::token_data::KnownPlan;
|
||||||
use crate::token_data::PlanType;
|
use crate::token_data::PlanType;
|
||||||
use codex_protocol::mcp_protocol::ConversationId;
|
use codex_protocol::mcp_protocol::ConversationId;
|
||||||
|
use codex_protocol::protocol::RateLimitSnapshot;
|
||||||
use reqwest::StatusCode;
|
use reqwest::StatusCode;
|
||||||
use serde_json;
|
use serde_json;
|
||||||
use std::io;
|
use std::io;
|
||||||
@@ -138,6 +139,7 @@ pub enum CodexErr {
|
|||||||
pub struct UsageLimitReachedError {
|
pub struct UsageLimitReachedError {
|
||||||
pub(crate) plan_type: Option<PlanType>,
|
pub(crate) plan_type: Option<PlanType>,
|
||||||
pub(crate) resets_in_seconds: Option<u64>,
|
pub(crate) resets_in_seconds: Option<u64>,
|
||||||
|
pub(crate) rate_limits: Option<RateLimitSnapshot>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for UsageLimitReachedError {
|
impl std::fmt::Display for UsageLimitReachedError {
|
||||||
@@ -266,11 +268,22 @@ pub fn get_error_message_ui(e: &CodexErr) -> String {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
fn rate_limit_snapshot() -> RateLimitSnapshot {
|
||||||
|
RateLimitSnapshot {
|
||||||
|
primary_used_percent: 0.5,
|
||||||
|
secondary_used_percent: 0.3,
|
||||||
|
primary_to_secondary_ratio_percent: 0.7,
|
||||||
|
primary_window_minutes: 60,
|
||||||
|
secondary_window_minutes: 120,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn usage_limit_reached_error_formats_plus_plan() {
|
fn usage_limit_reached_error_formats_plus_plan() {
|
||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: Some(PlanType::Known(KnownPlan::Plus)),
|
plan_type: Some(PlanType::Known(KnownPlan::Plus)),
|
||||||
resets_in_seconds: None,
|
resets_in_seconds: None,
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -283,6 +296,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: Some(PlanType::Known(KnownPlan::Free)),
|
plan_type: Some(PlanType::Known(KnownPlan::Free)),
|
||||||
resets_in_seconds: Some(3600),
|
resets_in_seconds: Some(3600),
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -295,6 +309,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: None,
|
plan_type: None,
|
||||||
resets_in_seconds: None,
|
resets_in_seconds: None,
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -307,6 +322,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: Some(PlanType::Known(KnownPlan::Team)),
|
plan_type: Some(PlanType::Known(KnownPlan::Team)),
|
||||||
resets_in_seconds: Some(3600),
|
resets_in_seconds: Some(3600),
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -319,6 +335,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: Some(PlanType::Known(KnownPlan::Business)),
|
plan_type: Some(PlanType::Known(KnownPlan::Business)),
|
||||||
resets_in_seconds: None,
|
resets_in_seconds: None,
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -331,6 +348,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: Some(PlanType::Known(KnownPlan::Pro)),
|
plan_type: Some(PlanType::Known(KnownPlan::Pro)),
|
||||||
resets_in_seconds: None,
|
resets_in_seconds: None,
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -343,6 +361,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: None,
|
plan_type: None,
|
||||||
resets_in_seconds: Some(5 * 60),
|
resets_in_seconds: Some(5 * 60),
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -355,6 +374,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: Some(PlanType::Known(KnownPlan::Plus)),
|
plan_type: Some(PlanType::Known(KnownPlan::Plus)),
|
||||||
resets_in_seconds: Some(3 * 3600 + 32 * 60),
|
resets_in_seconds: Some(3 * 3600 + 32 * 60),
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -367,6 +387,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: None,
|
plan_type: None,
|
||||||
resets_in_seconds: Some(2 * 86_400 + 3 * 3600 + 5 * 60),
|
resets_in_seconds: Some(2 * 86_400 + 3 * 3600 + 5 * 60),
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
@@ -379,6 +400,7 @@ mod tests {
|
|||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
plan_type: None,
|
plan_type: None,
|
||||||
resets_in_seconds: Some(30),
|
resets_in_seconds: Some(30),
|
||||||
|
rate_limits: Some(rate_limit_snapshot()),
|
||||||
};
|
};
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ use core_test_support::load_default_config_for_test;
|
|||||||
use core_test_support::load_sse_fixture_with_id;
|
use core_test_support::load_sse_fixture_with_id;
|
||||||
use core_test_support::non_sandbox_test;
|
use core_test_support::non_sandbox_test;
|
||||||
use core_test_support::responses;
|
use core_test_support::responses;
|
||||||
|
use core_test_support::test_codex::test_codex;
|
||||||
use core_test_support::wait_for_event;
|
use core_test_support::wait_for_event;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
@@ -797,7 +798,33 @@ async fn token_count_includes_rate_limits_snapshot() {
|
|||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let token_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
|
let first_token_event =
|
||||||
|
wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
|
||||||
|
let rate_limit_only = match first_token_event {
|
||||||
|
EventMsg::TokenCount(ev) => ev,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let rate_limit_json = serde_json::to_value(&rate_limit_only).unwrap();
|
||||||
|
pretty_assertions::assert_eq!(
|
||||||
|
rate_limit_json,
|
||||||
|
json!({
|
||||||
|
"info": null,
|
||||||
|
"rate_limits": {
|
||||||
|
"primary_used_percent": 12.5,
|
||||||
|
"secondary_used_percent": 40.0,
|
||||||
|
"primary_to_secondary_ratio_percent": 75.0,
|
||||||
|
"primary_window_minutes": 10,
|
||||||
|
"secondary_window_minutes": 60
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
let token_event = wait_for_event(
|
||||||
|
&codex,
|
||||||
|
|msg| matches!(msg, EventMsg::TokenCount(ev) if ev.info.is_some()),
|
||||||
|
)
|
||||||
|
.await;
|
||||||
let final_payload = match token_event {
|
let final_payload = match token_event {
|
||||||
EventMsg::TokenCount(ev) => ev,
|
EventMsg::TokenCount(ev) => ev,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
@@ -846,6 +873,80 @@ async fn token_count_includes_rate_limits_snapshot() {
|
|||||||
wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
|
wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> {
|
||||||
|
let server = MockServer::start().await;
|
||||||
|
|
||||||
|
let response = ResponseTemplate::new(429)
|
||||||
|
.insert_header("x-codex-primary-used-percent", "100.0")
|
||||||
|
.insert_header("x-codex-secondary-used-percent", "87.5")
|
||||||
|
.insert_header("x-codex-primary-over-secondary-limit-percent", "95.0")
|
||||||
|
.insert_header("x-codex-primary-window-minutes", "15")
|
||||||
|
.insert_header("x-codex-secondary-window-minutes", "60")
|
||||||
|
.set_body_json(json!({
|
||||||
|
"error": {
|
||||||
|
"type": "usage_limit_reached",
|
||||||
|
"message": "limit reached",
|
||||||
|
"resets_in_seconds": 42,
|
||||||
|
"plan_type": "pro"
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
|
||||||
|
Mock::given(method("POST"))
|
||||||
|
.and(path("/v1/responses"))
|
||||||
|
.respond_with(response)
|
||||||
|
.expect(1)
|
||||||
|
.mount(&server)
|
||||||
|
.await;
|
||||||
|
|
||||||
|
let mut builder = test_codex();
|
||||||
|
let codex_fixture = builder.build(&server).await?;
|
||||||
|
let codex = codex_fixture.codex.clone();
|
||||||
|
|
||||||
|
let expected_limits = json!({
|
||||||
|
"primary_used_percent": 100.0,
|
||||||
|
"secondary_used_percent": 87.5,
|
||||||
|
"primary_to_secondary_ratio_percent": 95.0,
|
||||||
|
"primary_window_minutes": 15,
|
||||||
|
"secondary_window_minutes": 60
|
||||||
|
});
|
||||||
|
|
||||||
|
let submission_id = codex
|
||||||
|
.submit(Op::UserInput {
|
||||||
|
items: vec![InputItem::Text {
|
||||||
|
text: "hello".into(),
|
||||||
|
}],
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
.expect("submission should succeed while emitting usage limit error events");
|
||||||
|
|
||||||
|
let token_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
|
||||||
|
let EventMsg::TokenCount(event) = token_event else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
|
||||||
|
let event_json = serde_json::to_value(&event).expect("serialize token count event");
|
||||||
|
pretty_assertions::assert_eq!(
|
||||||
|
event_json,
|
||||||
|
json!({
|
||||||
|
"info": null,
|
||||||
|
"rate_limits": expected_limits
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
let error_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::Error(_))).await;
|
||||||
|
let EventMsg::Error(error_event) = error_event else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
assert!(
|
||||||
|
error_event.message.to_lowercase().contains("usage limit"),
|
||||||
|
"unexpected error message for submission {submission_id}: {}",
|
||||||
|
error_event.message
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn azure_overrides_assign_properties_used_for_responses_url() {
|
async fn azure_overrides_assign_properties_used_for_responses_url() {
|
||||||
let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };
|
let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };
|
||||||
|
|||||||
@@ -592,11 +592,11 @@ impl TokenUsageInfo {
|
|||||||
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
|
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
|
||||||
pub struct TokenCountEvent {
|
pub struct TokenCountEvent {
|
||||||
pub info: Option<TokenUsageInfo>,
|
pub info: Option<TokenUsageInfo>,
|
||||||
pub rate_limits: Option<RateLimitSnapshotEvent>,
|
pub rate_limits: Option<RateLimitSnapshot>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
|
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
|
||||||
pub struct RateLimitSnapshotEvent {
|
pub struct RateLimitSnapshot {
|
||||||
/// Percentage (0-100) of the primary window that has been consumed.
|
/// Percentage (0-100) of the primary window that has been consumed.
|
||||||
pub primary_used_percent: f64,
|
pub primary_used_percent: f64,
|
||||||
/// Percentage (0-100) of the secondary window that has been consumed.
|
/// Percentage (0-100) of the secondary window that has been consumed.
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ use codex_core::protocol::McpToolCallBeginEvent;
|
|||||||
use codex_core::protocol::McpToolCallEndEvent;
|
use codex_core::protocol::McpToolCallEndEvent;
|
||||||
use codex_core::protocol::Op;
|
use codex_core::protocol::Op;
|
||||||
use codex_core::protocol::PatchApplyBeginEvent;
|
use codex_core::protocol::PatchApplyBeginEvent;
|
||||||
use codex_core::protocol::RateLimitSnapshotEvent;
|
use codex_core::protocol::RateLimitSnapshot;
|
||||||
use codex_core::protocol::ReviewRequest;
|
use codex_core::protocol::ReviewRequest;
|
||||||
use codex_core::protocol::StreamErrorEvent;
|
use codex_core::protocol::StreamErrorEvent;
|
||||||
use codex_core::protocol::TaskCompleteEvent;
|
use codex_core::protocol::TaskCompleteEvent;
|
||||||
@@ -132,6 +132,10 @@ impl RateLimitWarningState {
|
|||||||
secondary_used_percent: f64,
|
secondary_used_percent: f64,
|
||||||
primary_used_percent: f64,
|
primary_used_percent: f64,
|
||||||
) -> Vec<String> {
|
) -> Vec<String> {
|
||||||
|
if secondary_used_percent == 100.0 || primary_used_percent == 100.0 {
|
||||||
|
return Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
let mut warnings = Vec::new();
|
let mut warnings = Vec::new();
|
||||||
|
|
||||||
let mut highest_secondary: Option<f64> = None;
|
let mut highest_secondary: Option<f64> = None;
|
||||||
@@ -185,7 +189,7 @@ pub(crate) struct ChatWidget {
|
|||||||
session_header: SessionHeader,
|
session_header: SessionHeader,
|
||||||
initial_user_message: Option<UserMessage>,
|
initial_user_message: Option<UserMessage>,
|
||||||
token_info: Option<TokenUsageInfo>,
|
token_info: Option<TokenUsageInfo>,
|
||||||
rate_limit_snapshot: Option<RateLimitSnapshotEvent>,
|
rate_limit_snapshot: Option<RateLimitSnapshot>,
|
||||||
rate_limit_warnings: RateLimitWarningState,
|
rate_limit_warnings: RateLimitWarningState,
|
||||||
// Stream lifecycle controller
|
// Stream lifecycle controller
|
||||||
stream_controller: Option<StreamController>,
|
stream_controller: Option<StreamController>,
|
||||||
@@ -353,11 +357,13 @@ impl ChatWidget {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_token_info(&mut self, info: Option<TokenUsageInfo>) {
|
pub(crate) fn set_token_info(&mut self, info: Option<TokenUsageInfo>) {
|
||||||
self.bottom_pane.set_token_usage(info.clone());
|
if info.is_some() {
|
||||||
self.token_info = info;
|
self.bottom_pane.set_token_usage(info.clone());
|
||||||
|
self.token_info = info;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn on_rate_limit_snapshot(&mut self, snapshot: Option<RateLimitSnapshotEvent>) {
|
fn on_rate_limit_snapshot(&mut self, snapshot: Option<RateLimitSnapshot>) {
|
||||||
if let Some(snapshot) = snapshot {
|
if let Some(snapshot) = snapshot {
|
||||||
let warnings = self.rate_limit_warnings.take_warnings(
|
let warnings = self.rate_limit_warnings.take_warnings(
|
||||||
snapshot.secondary_used_percent,
|
snapshot.secondary_used_percent,
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ use codex_core::plan_tool::UpdatePlanArgs;
|
|||||||
use codex_core::project_doc::discover_project_doc_paths;
|
use codex_core::project_doc::discover_project_doc_paths;
|
||||||
use codex_core::protocol::FileChange;
|
use codex_core::protocol::FileChange;
|
||||||
use codex_core::protocol::McpInvocation;
|
use codex_core::protocol::McpInvocation;
|
||||||
use codex_core::protocol::RateLimitSnapshotEvent;
|
use codex_core::protocol::RateLimitSnapshot;
|
||||||
use codex_core::protocol::SandboxPolicy;
|
use codex_core::protocol::SandboxPolicy;
|
||||||
use codex_core::protocol::SessionConfiguredEvent;
|
use codex_core::protocol::SessionConfiguredEvent;
|
||||||
use codex_core::protocol::TokenUsage;
|
use codex_core::protocol::TokenUsage;
|
||||||
@@ -1082,7 +1082,7 @@ pub(crate) fn new_status_output(
|
|||||||
config: &Config,
|
config: &Config,
|
||||||
usage: &TokenUsage,
|
usage: &TokenUsage,
|
||||||
session_id: &Option<ConversationId>,
|
session_id: &Option<ConversationId>,
|
||||||
rate_limits: Option<&RateLimitSnapshotEvent>,
|
rate_limits: Option<&RateLimitSnapshot>,
|
||||||
) -> PlainHistoryCell {
|
) -> PlainHistoryCell {
|
||||||
let mut lines: Vec<Line<'static>> = Vec::new();
|
let mut lines: Vec<Line<'static>> = Vec::new();
|
||||||
lines.push("/status".magenta().into());
|
lines.push("/status".magenta().into());
|
||||||
@@ -1611,7 +1611,7 @@ fn format_mcp_invocation<'a>(invocation: McpInvocation) -> Line<'a> {
|
|||||||
invocation_spans.into()
|
invocation_spans.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_status_limit_lines(snapshot: Option<&RateLimitSnapshotEvent>) -> Vec<Line<'static>> {
|
fn build_status_limit_lines(snapshot: Option<&RateLimitSnapshot>) -> Vec<Line<'static>> {
|
||||||
let mut lines: Vec<Line<'static>> =
|
let mut lines: Vec<Line<'static>> =
|
||||||
vec![vec![padded_emoji("⏱️").into(), "Usage Limits".bold()].into()];
|
vec![vec![padded_emoji("⏱️").into(), "Usage Limits".bold()].into()];
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user