Auto compact at ~90% (#5292)
Users now hit a window exceeded limit and they usually don't know what to do. This starts auto compact at ~90% of the window.
This commit is contained in:
@@ -138,7 +138,7 @@ pub fn ev_response_created(id: &str) -> Value {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value {
|
||||
pub fn ev_completed_with_tokens(id: &str, total_tokens: i64) -> Value {
|
||||
serde_json::json!({
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
|
||||
@@ -858,8 +858,8 @@ async fn token_count_includes_rate_limits_snapshot() {
|
||||
"reasoning_output_tokens": 0,
|
||||
"total_tokens": 123
|
||||
},
|
||||
// Default model is gpt-5-codex in tests → 272000 context window
|
||||
"model_context_window": 272000
|
||||
// Default model is gpt-5-codex in tests → 95% usable context window
|
||||
"model_context_window": 258400
|
||||
},
|
||||
"rate_limits": {
|
||||
"primary": {
|
||||
@@ -985,6 +985,8 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
|
||||
skip_if_no_network!(Ok(()));
|
||||
let server = MockServer::start().await;
|
||||
|
||||
const EFFECTIVE_CONTEXT_WINDOW: i64 = (272_000 * 95) / 100;
|
||||
|
||||
responses::mount_sse_once_match(
|
||||
&server,
|
||||
body_string_contains("trigger context window"),
|
||||
@@ -1056,8 +1058,11 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
|
||||
.info
|
||||
.expect("token usage info present when context window is exceeded");
|
||||
|
||||
assert_eq!(info.model_context_window, Some(272_000));
|
||||
assert_eq!(info.total_token_usage.total_tokens, 272_000);
|
||||
assert_eq!(info.model_context_window, Some(EFFECTIVE_CONTEXT_WINDOW));
|
||||
assert_eq!(
|
||||
info.total_token_usage.total_tokens,
|
||||
EFFECTIVE_CONTEXT_WINDOW
|
||||
);
|
||||
|
||||
let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
|
||||
let expected_context_window_message = CodexErr::ContextWindowExceeded.to_string();
|
||||
|
||||
@@ -19,6 +19,7 @@ use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_completed_with_tokens;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::mount_sse_once;
|
||||
use core_test_support::responses::mount_sse_once_match;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
@@ -43,6 +44,7 @@ const CONTEXT_LIMIT_MESSAGE: &str =
|
||||
"Your input exceeds the context window of this model. Please adjust your input and try again.";
|
||||
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
|
||||
const DUMMY_CALL_ID: &str = "call-multi-auto";
|
||||
const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn summarize_context_three_requests_and_instructions() {
|
||||
@@ -860,3 +862,97 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
||||
"second auto compact request should include the summarization prompt"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
||||
skip_if_no_network!();
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let context_window = 100;
|
||||
let limit = context_window * 90 / 100;
|
||||
let over_limit_tokens = context_window * 95 / 100 + 1;
|
||||
|
||||
let first_turn = sse(vec![
|
||||
ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
|
||||
ev_completed_with_tokens("r1", 50),
|
||||
]);
|
||||
let function_call_follow_up = sse(vec![
|
||||
ev_assistant_message("m2", FINAL_REPLY),
|
||||
ev_completed_with_tokens("r2", over_limit_tokens),
|
||||
]);
|
||||
let auto_compact_turn = sse(vec![
|
||||
ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
|
||||
ev_completed_with_tokens("r3", 10),
|
||||
]);
|
||||
let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);
|
||||
|
||||
// Mount responses in order and keep mocks only for the ones we assert on.
|
||||
let first_turn_mock = mount_sse_once(&server, first_turn).await;
|
||||
let follow_up_mock = mount_sse_once(&server, function_call_follow_up).await;
|
||||
let auto_compact_mock = mount_sse_once(&server, auto_compact_turn).await;
|
||||
// We don't assert on the post-compact request, so no need to keep its mock.
|
||||
mount_sse_once(&server, post_auto_compact_turn).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
config.model_context_window = Some(context_window);
|
||||
config.model_auto_compact_token_limit = Some(limit);
|
||||
|
||||
let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
|
||||
.new_conversation(config)
|
||||
.await
|
||||
.unwrap()
|
||||
.conversation;
|
||||
|
||||
codex
|
||||
.submit(Op::UserInput {
|
||||
items: vec![InputItem::Text {
|
||||
text: FUNCTION_CALL_LIMIT_MSG.into(),
|
||||
}],
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
// Assert first request captured expected user message that triggers function call.
|
||||
let first_request = first_turn_mock.single_request().input();
|
||||
assert!(
|
||||
first_request.iter().any(|item| {
|
||||
item.get("type").and_then(|value| value.as_str()) == Some("message")
|
||||
&& item
|
||||
.get("content")
|
||||
.and_then(|content| content.as_array())
|
||||
.and_then(|entries| entries.first())
|
||||
.and_then(|entry| entry.get("text"))
|
||||
.and_then(|value| value.as_str())
|
||||
== Some(FUNCTION_CALL_LIMIT_MSG)
|
||||
}),
|
||||
"first request should include the user message that triggers the function call"
|
||||
);
|
||||
|
||||
let function_call_output = follow_up_mock
|
||||
.single_request()
|
||||
.function_call_output(DUMMY_CALL_ID);
|
||||
let output_text = function_call_output
|
||||
.get("output")
|
||||
.and_then(|value| value.as_str())
|
||||
.unwrap_or_default();
|
||||
assert!(
|
||||
output_text.contains(DUMMY_FUNCTION_NAME),
|
||||
"function call output should be sent before auto compact"
|
||||
);
|
||||
|
||||
let auto_compact_body = auto_compact_mock.single_request().body_json().to_string();
|
||||
assert!(
|
||||
auto_compact_body.contains("You have exceeded the maximum number of tokens"),
|
||||
"auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user