Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what to do. This starts auto compact at ~90% of the window.
2025-10-20 11:29:49 -07:00
parent cda6db6ccf
commit 049a61bcfc
21 changed files with 236 additions and 110 deletions
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -138,7 +138,7 @@ pub fn ev_response_created(id: &str) -> Value {
    })
 }

-pub fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value {
+pub fn ev_completed_with_tokens(id: &str, total_tokens: i64) -> Value {
    serde_json::json!({
        "type": "response.completed",
        "response": {
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -858,8 +858,8 @@ async fn token_count_includes_rate_limits_snapshot() {
                    "reasoning_output_tokens": 0,
                    "total_tokens": 123
                },
-                // Default model is gpt-5-codex in tests → 272000 context window
-                "model_context_window": 272000
+                // Default model is gpt-5-codex in tests → 95% usable context window
+                "model_context_window": 258400
            },
            "rate_limits": {
                "primary": {
@@ -985,6 +985,8 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
    skip_if_no_network!(Ok(()));
    let server = MockServer::start().await;

+    const EFFECTIVE_CONTEXT_WINDOW: i64 = (272_000 * 95) / 100;
+
    responses::mount_sse_once_match(
        &server,
        body_string_contains("trigger context window"),
@@ -1056,8 +1058,11 @@ async fn context_window_error_sets_total_tokens_to_model_window() -> anyhow::Res
        .info
        .expect("token usage info present when context window is exceeded");

-    assert_eq!(info.model_context_window, Some(272_000));
-    assert_eq!(info.total_token_usage.total_tokens, 272_000);
+    assert_eq!(info.model_context_window, Some(EFFECTIVE_CONTEXT_WINDOW));
+    assert_eq!(
+        info.total_token_usage.total_tokens,
+        EFFECTIVE_CONTEXT_WINDOW
+    );

    let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
    let expected_context_window_message = CodexErr::ContextWindowExceeded.to_string();
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -19,6 +19,7 @@ use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
 use core_test_support::responses::ev_completed_with_tokens;
 use core_test_support::responses::ev_function_call;
+use core_test_support::responses::mount_sse_once;
 use core_test_support::responses::mount_sse_once_match;
 use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
@@ -43,6 +44,7 @@ const CONTEXT_LIMIT_MESSAGE: &str =
    "Your input exceeds the context window of this model. Please adjust your input and try again.";
 const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
 const DUMMY_CALL_ID: &str = "call-multi-auto";
+const FUNCTION_CALL_LIMIT_MSG: &str = "function call limit push";

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn summarize_context_three_requests_and_instructions() {
@@ -860,3 +862,97 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
        "second auto compact request should include the summarization prompt"
    );
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
+    skip_if_no_network!();
+
+    let server = start_mock_server().await;
+
+    let context_window = 100;
+    let limit = context_window * 90 / 100;
+    let over_limit_tokens = context_window * 95 / 100 + 1;
+
+    let first_turn = sse(vec![
+        ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
+        ev_completed_with_tokens("r1", 50),
+    ]);
+    let function_call_follow_up = sse(vec![
+        ev_assistant_message("m2", FINAL_REPLY),
+        ev_completed_with_tokens("r2", over_limit_tokens),
+    ]);
+    let auto_compact_turn = sse(vec![
+        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
+        ev_completed_with_tokens("r3", 10),
+    ]);
+    let post_auto_compact_turn = sse(vec![ev_completed_with_tokens("r4", 10)]);
+
+    // Mount responses in order and keep mocks only for the ones we assert on.
+    let first_turn_mock = mount_sse_once(&server, first_turn).await;
+    let follow_up_mock = mount_sse_once(&server, function_call_follow_up).await;
+    let auto_compact_mock = mount_sse_once(&server, auto_compact_turn).await;
+    // We don't assert on the post-compact request, so no need to keep its mock.
+    mount_sse_once(&server, post_auto_compact_turn).await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home);
+    config.model_provider = model_provider;
+    config.model_context_window = Some(context_window);
+    config.model_auto_compact_token_limit = Some(limit);
+
+    let codex = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"))
+        .new_conversation(config)
+        .await
+        .unwrap()
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: FUNCTION_CALL_LIMIT_MSG.into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |msg| matches!(msg, EventMsg::TaskComplete(_))).await;
+
+    // Assert first request captured expected user message that triggers function call.
+    let first_request = first_turn_mock.single_request().input();
+    assert!(
+        first_request.iter().any(|item| {
+            item.get("type").and_then(|value| value.as_str()) == Some("message")
+                && item
+                    .get("content")
+                    .and_then(|content| content.as_array())
+                    .and_then(|entries| entries.first())
+                    .and_then(|entry| entry.get("text"))
+                    .and_then(|value| value.as_str())
+                    == Some(FUNCTION_CALL_LIMIT_MSG)
+        }),
+        "first request should include the user message that triggers the function call"
+    );
+
+    let function_call_output = follow_up_mock
+        .single_request()
+        .function_call_output(DUMMY_CALL_ID);
+    let output_text = function_call_output
+        .get("output")
+        .and_then(|value| value.as_str())
+        .unwrap_or_default();
+    assert!(
+        output_text.contains(DUMMY_FUNCTION_NAME),
+        "function call output should be sent before auto compact"
+    );
+
+    let auto_compact_body = auto_compact_mock.single_request().body_json().to_string();
+    assert!(
+        auto_compact_body.contains("You have exceeded the maximum number of tokens"),
+        "auto compact request should include the summarization prompt after exceeding 95% (limit {limit})"
+    );
+}