Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
This commit is contained in:
Ahmed Ibrahim
2025-09-23 15:56:34 -07:00
committed by GitHub
parent fdb8dadcae
commit 8227a5ba1b
8 changed files with 186 additions and 46 deletions

View File

@@ -23,6 +23,7 @@ use core_test_support::load_default_config_for_test;
use core_test_support::load_sse_fixture_with_id;
use core_test_support::non_sandbox_test;
use core_test_support::responses;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use futures::StreamExt;
use serde_json::json;
@@ -797,7 +798,33 @@ async fn token_count_includes_rate_limits_snapshot() {
.await
.unwrap();
let token_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
let first_token_event =
wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
let rate_limit_only = match first_token_event {
EventMsg::TokenCount(ev) => ev,
_ => unreachable!(),
};
let rate_limit_json = serde_json::to_value(&rate_limit_only).unwrap();
pretty_assertions::assert_eq!(
rate_limit_json,
json!({
"info": null,
"rate_limits": {
"primary_used_percent": 12.5,
"secondary_used_percent": 40.0,
"primary_to_secondary_ratio_percent": 75.0,
"primary_window_minutes": 10,
"secondary_window_minutes": 60
}
})
);
let token_event = wait_for_event(
&codex,
|msg| matches!(msg, EventMsg::TokenCount(ev) if ev.info.is_some()),
)
.await;
let final_payload = match token_event {
EventMsg::TokenCount(ev) => ev,
_ => unreachable!(),
@@ -846,6 +873,80 @@ async fn token_count_includes_rate_limits_snapshot() {
wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn usage_limit_error_emits_rate_limit_event() -> anyhow::Result<()> {
let server = MockServer::start().await;
let response = ResponseTemplate::new(429)
.insert_header("x-codex-primary-used-percent", "100.0")
.insert_header("x-codex-secondary-used-percent", "87.5")
.insert_header("x-codex-primary-over-secondary-limit-percent", "95.0")
.insert_header("x-codex-primary-window-minutes", "15")
.insert_header("x-codex-secondary-window-minutes", "60")
.set_body_json(json!({
"error": {
"type": "usage_limit_reached",
"message": "limit reached",
"resets_in_seconds": 42,
"plan_type": "pro"
}
}));
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(response)
.expect(1)
.mount(&server)
.await;
let mut builder = test_codex();
let codex_fixture = builder.build(&server).await?;
let codex = codex_fixture.codex.clone();
let expected_limits = json!({
"primary_used_percent": 100.0,
"secondary_used_percent": 87.5,
"primary_to_secondary_ratio_percent": 95.0,
"primary_window_minutes": 15,
"secondary_window_minutes": 60
});
let submission_id = codex
.submit(Op::UserInput {
items: vec![InputItem::Text {
text: "hello".into(),
}],
})
.await
.expect("submission should succeed while emitting usage limit error events");
let token_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::TokenCount(_))).await;
let EventMsg::TokenCount(event) = token_event else {
unreachable!();
};
let event_json = serde_json::to_value(&event).expect("serialize token count event");
pretty_assertions::assert_eq!(
event_json,
json!({
"info": null,
"rate_limits": expected_limits
})
);
let error_event = wait_for_event(&codex, |msg| matches!(msg, EventMsg::Error(_))).await;
let EventMsg::Error(error_event) = error_event else {
unreachable!();
};
assert!(
error_event.message.to_lowercase().contains("usage limit"),
"unexpected error message for submission {submission_id}: {}",
error_event.message
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn azure_overrides_assign_properties_used_for_responses_url() {
let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };