codex-rs/core/tests/suite/compact.rs

#![expect(clippy::unwrap_used)]

use codex_core::CodexAuth;
use codex_core::ConversationManager;
use codex_core::ModelProviderInfo;
use codex_core::NewConversation;
use codex_core::built_in_model_providers;
use codex_core::protocol::ErrorEvent;
use codex_core::protocol::EventMsg;
use codex_core::protocol::InputItem;
use codex_core::protocol::Op;
use codex_core::protocol::RolloutItem;
use codex_core::protocol::RolloutLine;
use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
use core_test_support::load_default_config_for_test;
use core_test_support::wait_for_event;
use serde_json::Value;
use tempfile::TempDir;
use wiremock::BodyPrintLimit;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::Request;
use wiremock::Respond;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;

use pretty_assertions::assert_eq;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;

// --- Test helpers -----------------------------------------------------------

/// Build an SSE stream body from a list of JSON events.
fn sse(events: Vec<Value>) -> String {
    use std::fmt::Write as _;
    let mut out = String::new();
    for ev in events {
        let kind = ev.get("type").and_then(|v| v.as_str()).unwrap();
        writeln!(&mut out, "event: {kind}").unwrap();
        if !ev.as_object().map(|o| o.len() == 1).unwrap_or(false) {
            write!(&mut out, "data: {ev}\n\n").unwrap();
        } else {
            out.push('\n');
        }
    }
    out
}

/// Convenience: SSE event for a completed response with a specific id.
fn ev_completed(id: &str) -> Value {
    serde_json::json!({
        "type": "response.completed",
        "response": {
            "id": id,
            "usage": {"input_tokens":0,"input_tokens_details":null,"output_tokens":0,"output_tokens_details":null,"total_tokens":0}
        }
    })
}

fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value {
    serde_json::json!({
        "type": "response.completed",
        "response": {
            "id": id,
            "usage": {
                "input_tokens": total_tokens,
                "input_tokens_details": null,
                "output_tokens": 0,
                "output_tokens_details": null,
                "total_tokens": total_tokens
            }
        }
    })
}

/// Convenience: SSE event for a single assistant message output item.
fn ev_assistant_message(id: &str, text: &str) -> Value {
    serde_json::json!({
        "type": "response.output_item.done",
        "item": {
            "type": "message",
            "role": "assistant",
            "id": id,
            "content": [{"type": "output_text", "text": text}]
        }
    })
}

fn ev_function_call(call_id: &str, name: &str, arguments: &str) -> Value {
    serde_json::json!({
        "type": "response.output_item.done",
        "item": {
            "type": "function_call",
            "call_id": call_id,
            "name": name,
            "arguments": arguments
        }
    })
}

fn sse_response(body: String) -> ResponseTemplate {
    ResponseTemplate::new(200)
        .insert_header("content-type", "text/event-stream")
        .set_body_raw(body, "text/event-stream")
}

async fn mount_sse_once<M>(server: &MockServer, matcher: M, body: String)
where
    M: wiremock::Match + Send + Sync + 'static,
{
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(matcher)
        .respond_with(sse_response(body))
        .expect(1)
        .mount(server)
        .await;
}

async fn start_mock_server() -> MockServer {
    MockServer::builder()
        .body_print_limit(BodyPrintLimit::Limited(80_000))
        .start()
        .await
}

const FIRST_REPLY: &str = "FIRST_REPLY";
const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
const SUMMARIZE_TRIGGER: &str = "Start Summarization";
const THIRD_USER_MSG: &str = "next turn";
const AUTO_SUMMARY_TEXT: &str = "AUTO_SUMMARY";
const FIRST_AUTO_MSG: &str = "token limit start";
const SECOND_AUTO_MSG: &str = "token limit push";
const STILL_TOO_BIG_REPLY: &str = "STILL_TOO_BIG";
const MULTI_AUTO_MSG: &str = "multi auto";
const SECOND_LARGE_REPLY: &str = "SECOND_LARGE_REPLY";
const FIRST_AUTO_SUMMARY: &str = "FIRST_AUTO_SUMMARY";
const SECOND_AUTO_SUMMARY: &str = "SECOND_AUTO_SUMMARY";
const FINAL_REPLY: &str = "FINAL_REPLY";
const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
const DUMMY_CALL_ID: &str = "call-multi-auto";

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn summarize_context_three_requests_and_instructions() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }

    // Set up a mock server that we can inspect after the run.
    let server = start_mock_server().await;

    // SSE 1: assistant replies normally so it is recorded in history.
    let sse1 = sse(vec![
        ev_assistant_message("m1", FIRST_REPLY),
        ev_completed("r1"),
    ]);

    // SSE 2: summarizer returns a summary message.
    let sse2 = sse(vec![
        ev_assistant_message("m2", SUMMARY_TEXT),
        ev_completed("r2"),
    ]);

    // SSE 3: minimal completed; we only need to capture the request body.
    let sse3 = sse(vec![ev_completed("r3")]);

    // Mount three expectations, one per request, matched by body content.
    let first_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("\"text\":\"hello world\"")
            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
    };
    mount_sse_once(&server, first_matcher, sse1).await;

    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
    };
    mount_sse_once(&server, second_matcher, sse2).await;

    let third_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\""))
    };
    mount_sse_once(&server, third_matcher, sse3).await;

    // Build config pointing to the mock server and spawn Codex.
    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
        ..built_in_model_providers()["openai"].clone()
    };
    let home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&home);
    config.model_provider = model_provider;
    config.model_auto_compact_token_limit = Some(200_000);
    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
    let NewConversation {
        conversation: codex,
        session_configured,
        ..
    } = conversation_manager.new_conversation(config).await.unwrap();
    let rollout_path = session_configured.rollout_path;

    // 1) Normal user input – should hit server once.
    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: "hello world".into(),
            }],
        })
        .await
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    // 2) Summarize – second hit with summarization instructions.
    codex.submit(Op::Compact).await.unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    // 3) Next user input – third hit; history should include only the summary.
    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: THIRD_USER_MSG.into(),
            }],
        })
        .await
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    // Inspect the three captured requests.
    let requests = server.received_requests().await.unwrap();
    assert_eq!(requests.len(), 3, "expected exactly three requests");

    let req1 = &requests[0];
    let req2 = &requests[1];
    let req3 = &requests[2];

    let body1 = req1.body_json::<serde_json::Value>().unwrap();
    let body2 = req2.body_json::<serde_json::Value>().unwrap();
    let body3 = req3.body_json::<serde_json::Value>().unwrap();

    // System instructions should change for the summarization turn.
    let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
    let instr2 = body2.get("instructions").and_then(|v| v.as_str()).unwrap();
    assert_ne!(
        instr1, instr2,
        "summarization should override base instructions"
    );
    assert!(
        instr2.contains("You have exceeded the maximum number of tokens"),
        "summarization instructions not applied"
    );

    // The summarization request should include the injected user input marker.
    let input2 = body2.get("input").and_then(|v| v.as_array()).unwrap();
    // The last item is the user message created from the injected input.
    let last2 = input2.last().unwrap();
    assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
    assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
    let text2 = last2["content"][0]["text"].as_str().unwrap();
    assert!(
        text2.contains(SUMMARIZE_TRIGGER),
        "expected summarize trigger, got `{text2}`"
    );

    // Third request must contain the refreshed instructions, bridge summary message and new user msg.
    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
    println!("third request body: {body3}");
    assert!(
        input3.len() >= 3,
        "expected refreshed context and new user message in third request"
    );

    // Collect all (role, text) message tuples.
    let mut messages: Vec<(String, String)> = Vec::new();
    for item in input3 {
        if item["type"].as_str() == Some("message") {
            let role = item["role"].as_str().unwrap_or_default().to_string();
            let text = item["content"][0]["text"]
                .as_str()
                .unwrap_or_default()
                .to_string();
            messages.push((role, text));
        }
    }

    // No previous assistant messages should remain and the new user message is present.
    let assistant_count = messages.iter().filter(|(r, _)| r == "assistant").count();
    assert_eq!(assistant_count, 0, "assistant history should be cleared");
    assert!(
        messages
            .iter()
            .any(|(r, t)| r == "user" && t == THIRD_USER_MSG),
        "third request should include the new user message"
    );
    let Some((_, bridge_text)) = messages.iter().find(|(role, text)| {
        role == "user"
            && (text.contains("Here were the user messages")
                || text.contains("Here are all the user messages"))
            && text.contains(SUMMARY_TEXT)
    }) else {
        panic!("expected a bridge message containing the summary");
    };
    assert!(
        bridge_text.contains("hello world"),
        "bridge should capture earlier user messages"
    );
    assert!(
        !bridge_text.contains(SUMMARIZE_TRIGGER),
        "bridge text should not echo the summarize trigger"
    );
    assert!(
        !messages
            .iter()
            .any(|(_, text)| text.contains(SUMMARIZE_TRIGGER)),
        "third request should not include the summarize trigger"
    );

    // Shut down Codex to flush rollout entries before inspecting the file.
    codex.submit(Op::Shutdown).await.unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;

    // Verify rollout contains APITurn entries for each API call and a Compacted entry.
    println!("rollout path: {}", rollout_path.display());
    let text = std::fs::read_to_string(&rollout_path).unwrap_or_else(|e| {
        panic!(
            "failed to read rollout file {}: {e}",
            rollout_path.display()
        )
    });
    let mut api_turn_count = 0usize;
    let mut saw_compacted_summary = false;
    for line in text.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        let Ok(entry): Result<RolloutLine, _> = serde_json::from_str(trimmed) else {
            continue;
        };
        match entry.item {
            RolloutItem::TurnContext(_) => {
                api_turn_count += 1;
            }
            RolloutItem::Compacted(ci) => {
                if ci.message == SUMMARY_TEXT {
                    saw_compacted_summary = true;
                }
            }
            _ => {}
        }
    }

    assert!(
        api_turn_count == 3,
        "expected three APITurn entries in rollout"
    );
    assert!(
        saw_compacted_summary,
        "expected a Compacted entry containing the summarizer output"
    );
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_runs_after_token_limit_hit() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }

    let server = start_mock_server().await;

    let sse1 = sse(vec![
        ev_assistant_message("m1", FIRST_REPLY),
        ev_completed_with_tokens("r1", 70_000),
    ]);

    let sse2 = sse(vec![
        ev_assistant_message("m2", "SECOND_REPLY"),
        ev_completed_with_tokens("r2", 330_000),
    ]);

    let sse3 = sse(vec![
        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
        ev_completed_with_tokens("r3", 200),
    ]);

    let first_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(FIRST_AUTO_MSG)
            && !body.contains(SECOND_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(first_matcher)
        .respond_with(sse_response(sse1))
        .mount(&server)
        .await;

    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(SECOND_AUTO_MSG)
            && body.contains(FIRST_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(second_matcher)
        .respond_with(sse_response(sse2))
        .mount(&server)
        .await;

    let third_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("You have exceeded the maximum number of tokens")
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(third_matcher)
        .respond_with(sse_response(sse3))
        .mount(&server)
        .await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
        ..built_in_model_providers()["openai"].clone()
    };

    let home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&home);
    config.model_provider = model_provider;
    config.model_auto_compact_token_limit = Some(200_000);
    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
    let codex = conversation_manager
        .new_conversation(config)
        .await
        .unwrap()
        .conversation;

    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: FIRST_AUTO_MSG.into(),
            }],
        })
        .await
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: SECOND_AUTO_MSG.into(),
            }],
        })
        .await
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
    // wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    let requests = server.received_requests().await.unwrap();
    assert_eq!(requests.len(), 3, "auto compact should add a third request");

    let body3 = requests[2].body_json::<serde_json::Value>().unwrap();
    let instructions = body3
        .get("instructions")
        .and_then(|v| v.as_str())
        .unwrap_or_default();
    assert!(
        instructions.contains("You have exceeded the maximum number of tokens"),
        "auto compact should reuse summarization instructions"
    );
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_persists_rollout_entries() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }

    let server = start_mock_server().await;

    let sse1 = sse(vec![
        ev_assistant_message("m1", FIRST_REPLY),
        ev_completed_with_tokens("r1", 70_000),
    ]);

    let sse2 = sse(vec![
        ev_assistant_message("m2", "SECOND_REPLY"),
        ev_completed_with_tokens("r2", 330_000),
    ]);

    let sse3 = sse(vec![
        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
        ev_completed_with_tokens("r3", 200),
    ]);

    let first_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(FIRST_AUTO_MSG)
            && !body.contains(SECOND_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(first_matcher)
        .respond_with(sse_response(sse1))
        .mount(&server)
        .await;

    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(SECOND_AUTO_MSG)
            && body.contains(FIRST_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(second_matcher)
        .respond_with(sse_response(sse2))
        .mount(&server)
        .await;

    let third_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("You have exceeded the maximum number of tokens")
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(third_matcher)
        .respond_with(sse_response(sse3))
        .mount(&server)
        .await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
        ..built_in_model_providers()["openai"].clone()
    };

    let home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&home);
    config.model_provider = model_provider;
    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
    let NewConversation {
        conversation: codex,
        session_configured,
        ..
    } = conversation_manager.new_conversation(config).await.unwrap();

    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: FIRST_AUTO_MSG.into(),
            }],
        })
        .await
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: SECOND_AUTO_MSG.into(),
            }],
        })
        .await
        .unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    codex.submit(Op::Shutdown).await.unwrap();
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;

    let rollout_path = session_configured.rollout_path;
    let text = std::fs::read_to_string(&rollout_path).unwrap_or_else(|e| {
        panic!(
            "failed to read rollout file {}: {e}",
            rollout_path.display()
        )
    });

    let mut turn_context_count = 0usize;
    for line in text.lines() {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            continue;
        }
        let Ok(entry): Result<RolloutLine, _> = serde_json::from_str(trimmed) else {
            continue;
        };
        match entry.item {
            RolloutItem::TurnContext(_) => {
                turn_context_count += 1;
            }
            RolloutItem::Compacted(_) => {}
            _ => {}
        }
    }

    assert!(
        turn_context_count >= 2,
        "expected at least two turn context entries, got {turn_context_count}"
    );
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_stops_after_failed_attempt() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }

    let server = start_mock_server().await;

    let sse1 = sse(vec![
        ev_assistant_message("m1", FIRST_REPLY),
        ev_completed_with_tokens("r1", 500),
    ]);

    let sse2 = sse(vec![
        ev_assistant_message("m2", SUMMARY_TEXT),
        ev_completed_with_tokens("r2", 50),
    ]);

    let sse3 = sse(vec![
        ev_assistant_message("m3", STILL_TOO_BIG_REPLY),
        ev_completed_with_tokens("r3", 500),
    ]);

    let first_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains(FIRST_AUTO_MSG)
            && !body.contains("You have exceeded the maximum number of tokens")
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(first_matcher)
        .respond_with(sse_response(sse1.clone()))
        .mount(&server)
        .await;

    let second_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        body.contains("You have exceeded the maximum number of tokens")
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(second_matcher)
        .respond_with(sse_response(sse2.clone()))
        .mount(&server)
        .await;

    let third_matcher = |req: &wiremock::Request| {
        let body = std::str::from_utf8(&req.body).unwrap_or("");
        !body.contains("You have exceeded the maximum number of tokens")
            && body.contains(SUMMARY_TEXT)
    };
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .and(third_matcher)
        .respond_with(sse_response(sse3.clone()))
        .mount(&server)
        .await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
        ..built_in_model_providers()["openai"].clone()
    };

    let home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&home);
    config.model_provider = model_provider;
    config.model_auto_compact_token_limit = Some(200);
    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
    let codex = conversation_manager
        .new_conversation(config)
        .await
        .unwrap()
        .conversation;

    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: FIRST_AUTO_MSG.into(),
            }],
        })
        .await
        .unwrap();

    let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
    let EventMsg::Error(ErrorEvent { message }) = error_event else {
        panic!("expected error event");
    };
    assert!(
        message.contains("limit"),
        "error message should include limit information: {message}"
    );
    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;

    let requests = server.received_requests().await.unwrap();
    assert_eq!(
        requests.len(),
        3,
        "auto compact should attempt at most one summarization before erroring"
    );

    let last_body = requests[2].body_json::<serde_json::Value>().unwrap();
    let instructions = last_body
        .get("instructions")
        .and_then(|v| v.as_str())
        .unwrap_or_default();
    assert!(
        !instructions.contains("You have exceeded the maximum number of tokens"),
        "third request should be the follow-up turn, not another summarization"
    );
}

#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }

    let server = start_mock_server().await;

    let sse1 = sse(vec![
        ev_assistant_message("m1", FIRST_REPLY),
        ev_completed_with_tokens("r1", 500),
    ]);
    let sse2 = sse(vec![
        ev_assistant_message("m2", FIRST_AUTO_SUMMARY),
        ev_completed_with_tokens("r2", 50),
    ]);
    let sse3 = sse(vec![
        ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
        ev_completed_with_tokens("r3", 150),
    ]);
    let sse4 = sse(vec![
        ev_assistant_message("m4", SECOND_LARGE_REPLY),
        ev_completed_with_tokens("r4", 450),
    ]);
    let sse5 = sse(vec![
        ev_assistant_message("m5", SECOND_AUTO_SUMMARY),
        ev_completed_with_tokens("r5", 60),
    ]);
    let sse6 = sse(vec![
        ev_assistant_message("m6", FINAL_REPLY),
        ev_completed_with_tokens("r6", 120),
    ]);

    #[derive(Clone)]
    struct SeqResponder {
        bodies: Arc<Vec<String>>,
        calls: Arc<AtomicUsize>,
        requests: Arc<Mutex<Vec<Vec<u8>>>>,
    }

    impl SeqResponder {
        fn new(bodies: Vec<String>) -> Self {
            Self {
                bodies: Arc::new(bodies),
                calls: Arc::new(AtomicUsize::new(0)),
                requests: Arc::new(Mutex::new(Vec::new())),
            }
        }

        fn recorded_requests(&self) -> Vec<Vec<u8>> {
            self.requests.lock().unwrap().clone()
        }
    }

    impl Respond for SeqResponder {
        fn respond(&self, req: &Request) -> ResponseTemplate {
            let idx = self.calls.fetch_add(1, Ordering::SeqCst);
            self.requests.lock().unwrap().push(req.body.clone());
            let body = self
                .bodies
                .get(idx)
                .unwrap_or_else(|| panic!("unexpected request index {idx}"))
                .clone();
            ResponseTemplate::new(200)
                .insert_header("content-type", "text/event-stream")
                .set_body_raw(body, "text/event-stream")
        }
    }

    let responder = SeqResponder::new(vec![sse1, sse2, sse3, sse4, sse5, sse6]);
    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .respond_with(responder.clone())
        .expect(6)
        .mount(&server)
        .await;

    let model_provider = ModelProviderInfo {
        base_url: Some(format!("{}/v1", server.uri())),
        ..built_in_model_providers()["openai"].clone()
    };

    let home = TempDir::new().unwrap();
    let mut config = load_default_config_for_test(&home);
    config.model_provider = model_provider;
    config.model_auto_compact_token_limit = Some(200);
    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
    let codex = conversation_manager
        .new_conversation(config)
        .await
        .unwrap()
        .conversation;

    codex
        .submit(Op::UserInput {
            items: vec![InputItem::Text {
                text: MULTI_AUTO_MSG.into(),
            }],
        })
        .await
        .unwrap();

    loop {
        let event = codex.next_event().await.unwrap();
        if let EventMsg::TaskComplete(_) = &event.msg
            && !event.id.starts_with("auto-compact-")
        {
            break;
        }
    }

    let request_bodies: Vec<String> = responder
        .recorded_requests()
        .into_iter()
        .map(|body| String::from_utf8(body).unwrap_or_default())
        .collect();
    assert_eq!(
        request_bodies.len(),
        6,
        "expected six requests including two auto compactions"
    );
    assert!(
        request_bodies[0].contains(MULTI_AUTO_MSG),
        "first request should contain the user input"
    );
    assert!(
        request_bodies[1].contains("You have exceeded the maximum number of tokens"),
        "first auto compact request should use summarization instructions"
    );
    assert!(
        request_bodies[3].contains(&format!("unsupported call: {DUMMY_FUNCTION_NAME}")),
        "function call output should be sent before the second auto compact"
    );
    assert!(
        request_bodies[4].contains("You have exceeded the maximum number of tokens"),
        "second auto compact request should reuse summarization instructions"
    );
}
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								#![expect(clippy::unwrap_used)]
-												Move CodexAuth and AuthManager to the core crate (#3074)

Fix a long standing layering issue.
											
										
										
											2025-09-02 18:36:19 -07:00
+								use codex_core::CodexAuth;
-												chore: introduce ConversationManager as a clearinghouse for all conversations (#2240)

This PR does two things because after I got deep into the first one I
started pulling on the thread to the second:

- Makes `ConversationManager` the place where all in-memory
conversations are created and stored. Previously, `MessageProcessor` in
the `codex-mcp-server` crate was doing this via its `session_map`, but
this is something that should be done in `codex-core`.
- It unwinds the `ctrl_c: tokio::sync::Notify` that was threaded
throughout our code. I think this made sense at one time, but now that
we handle Ctrl-C within the TUI and have a proper `Op::Interrupt` event,
I don't think this was quite right, so I removed it. For `codex exec`
and `codex proto`, we now use `tokio::signal::ctrl_c()` directly, but we
no longer make `Notify` a field of `Codex` or `CodexConversation`.

Changes of note:

- Adds the files `conversation_manager.rs` and `codex_conversation.rs`
to `codex-core`.
- `Codex` and `CodexSpawnOk` are no longer exported from `codex-core`:
other crates must use `CodexConversation` instead (which is created via
`ConversationManager`).
- `core/src/codex_wrapper.rs` has been deleted in favor of
`ConversationManager`.
- `ConversationManager::new_conversation()` returns `NewConversation`,
which is in line with the `new_conversation` tool we want to add to the
MCP server. Note `NewConversation` includes `SessionConfiguredEvent`, so
we eliminate checks in cases like `codex-rs/core/tests/client.rs` to
verify `SessionConfiguredEvent` is the first event because that is now
internal to `ConversationManager`.
- Quite a bit of code was deleted from
`codex-rs/mcp-server/src/message_processor.rs` since it no longer has to
manage multiple conversations itself: it goes through
`ConversationManager` instead.
- `core/tests/live_agent.rs` has been deleted because I had to update a
bunch of tests and all the tests in here were ignored, and I don't think
anyone ever ran them, so this was just technical debt, at this point.
- Removed `notify_on_sigint()` from `util.rs` (and in a follow-up, I
hope to refactor the blandly-named `util.rs` into more descriptive
files).
- In general, I started replacing local variables named `codex` as
`conversation`, where appropriate, though admittedly I didn't do it
through all the integration tests because that would have added a lot of
noise to this PR.




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2240).
* #2264
* #2263
* __->__ #2240
											
										
										
											2025-08-13 13:38:18 -07:00
+								use codex_core::ConversationManager;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								use codex_core::ModelProviderInfo;
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								use codex_core::NewConversation;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								use codex_core::built_in_model_providers;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								use codex_core::protocol::ErrorEvent;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								use codex_core::protocol::EventMsg;
 								use codex_core::protocol::InputItem;
 								use codex_core::protocol::Op;
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								use codex_core::protocol::RolloutItem;
 								use codex_core::protocol::RolloutLine;
-												ci fix (#1782)


											
										
										
											2025-08-01 09:17:13 -07:00
+								use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								use core_test_support::load_default_config_for_test;
 								use core_test_support::wait_for_event;
 								use serde_json::Value;
 								use tempfile::TempDir;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								use wiremock::BodyPrintLimit;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								use wiremock::Mock;
 								use wiremock::MockServer;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								use wiremock::Request;
 								use wiremock::Respond;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								use wiremock::ResponseTemplate;
 								use wiremock::matchers::method;
 								use wiremock::matchers::path;
 								use pretty_assertions::assert_eq;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								use std::sync::Arc;
 								use std::sync::Mutex;
 								use std::sync::atomic::AtomicUsize;
 								use std::sync::atomic::Ordering;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
 								// --- Test helpers -----------------------------------------------------------
 								/// Build an SSE stream body from a list of JSON events.
 								fn sse(events: Vec<Value>) -> String {
 								    use std::fmt::Write as _;
 								    let mut out = String::new();
 								    for ev in events {
 								        let kind = ev.get("type").and_then(|v| v.as_str()).unwrap();
 								        writeln!(&mut out, "event: {kind}").unwrap();
 								        if !ev.as_object().map(|o| o.len() == 1).unwrap_or(false) {
 								            write!(&mut out, "data: {ev}\n\n").unwrap();
 								        } else {
 								            out.push('\n');
 								        }
 								    }
 								    out
 								}
 								/// Convenience: SSE event for a completed response with a specific id.
 								fn ev_completed(id: &str) -> Value {
 								    serde_json::json!({
 								        "type": "response.completed",
 								        "response": {
 								            "id": id,
 								            "usage": {"input_tokens":0,"input_tokens_details":null,"output_tokens":0,"output_tokens_details":null,"total_tokens":0}
 								        }
 								    })
 								}
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								fn ev_completed_with_tokens(id: &str, total_tokens: u64) -> Value {
 								    serde_json::json!({
 								        "type": "response.completed",
 								        "response": {
 								            "id": id,
 								            "usage": {
 								                "input_tokens": total_tokens,
 								                "input_tokens_details": null,
 								                "output_tokens": 0,
 								                "output_tokens_details": null,
 								                "total_tokens": total_tokens
 								            }
 								        }
 								    })
 								}
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								/// Convenience: SSE event for a single assistant message output item.
 								fn ev_assistant_message(id: &str, text: &str) -> Value {
 								    serde_json::json!({
 								        "type": "response.output_item.done",
 								        "item": {
 								            "type": "message",
 								            "role": "assistant",
 								            "id": id,
 								            "content": [{"type": "output_text", "text": text}]
 								        }
 								    })
 								}
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								fn ev_function_call(call_id: &str, name: &str, arguments: &str) -> Value {
 								    serde_json::json!({
 								        "type": "response.output_item.done",
 								        "item": {
 								            "type": "function_call",
 								            "call_id": call_id,
 								            "name": name,
 								            "arguments": arguments
 								        }
 								    })
 								}
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								fn sse_response(body: String) -> ResponseTemplate {
 								    ResponseTemplate::new(200)
 								        .insert_header("content-type", "text/event-stream")
 								        .set_body_raw(body, "text/event-stream")
 								}
 								async fn mount_sse_once<M>(server: &MockServer, matcher: M, body: String)
 								where
 								    M: wiremock::Match + Send + Sync + 'static,
 								{
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(matcher)
 								        .respond_with(sse_response(body))
 								        .expect(1)
 								        .mount(server)
 								        .await;
 								}
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								async fn start_mock_server() -> MockServer {
 								    MockServer::builder()
 								        .body_print_limit(BodyPrintLimit::Limited(80_000))
 								        .start()
 								        .await
 								}
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								const FIRST_REPLY: &str = "FIRST_REPLY";
 								const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
 								const SUMMARIZE_TRIGGER: &str = "Start Summarization";
 								const THIRD_USER_MSG: &str = "next turn";
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								const AUTO_SUMMARY_TEXT: &str = "AUTO_SUMMARY";
 								const FIRST_AUTO_MSG: &str = "token limit start";
 								const SECOND_AUTO_MSG: &str = "token limit push";
 								const STILL_TOO_BIG_REPLY: &str = "STILL_TOO_BIG";
 								const MULTI_AUTO_MSG: &str = "multi auto";
 								const SECOND_LARGE_REPLY: &str = "SECOND_LARGE_REPLY";
 								const FIRST_AUTO_SUMMARY: &str = "FIRST_AUTO_SUMMARY";
 								const SECOND_AUTO_SUMMARY: &str = "SECOND_AUTO_SUMMARY";
 								const FINAL_REPLY: &str = "FINAL_REPLY";
 								const DUMMY_FUNCTION_NAME: &str = "unsupported_tool";
 								const DUMMY_CALL_ID: &str = "call-multi-auto";
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
 								#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 								async fn summarize_context_three_requests_and_instructions() {
 								    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
 								        println!(
 								            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
 								        );
 								        return;
 								    }
 								    // Set up a mock server that we can inspect after the run.
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    let server = start_mock_server().await;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
 								    // SSE 1: assistant replies normally so it is recorded in history.
 								    let sse1 = sse(vec![
 								        ev_assistant_message("m1", FIRST_REPLY),
 								        ev_completed("r1"),
 								    ]);
 								    // SSE 2: summarizer returns a summary message.
 								    let sse2 = sse(vec![
 								        ev_assistant_message("m2", SUMMARY_TEXT),
 								        ev_completed("r2"),
 								    ]);
 								    // SSE 3: minimal completed; we only need to capture the request body.
 								    let sse3 = sse(vec![ev_completed("r3")]);
 								    // Mount three expectations, one per request, matched by body content.
 								    let first_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains("\"text\":\"hello world\"")
 								            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
 								    };
 								    mount_sse_once(&server, first_matcher, sse1).await;
 								    let second_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
 								    };
 								    mount_sse_once(&server, second_matcher, sse2).await;
 								    let third_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\""))
 								    };
 								    mount_sse_once(&server, third_matcher, sse3).await;
 								    // Build config pointing to the mock server and spawn Codex.
 								    let model_provider = ModelProviderInfo {
 								        base_url: Some(format!("{}/v1", server.uri())),
 								        ..built_in_model_providers()["openai"].clone()
 								    };
 								    let home = TempDir::new().unwrap();
 								    let mut config = load_default_config_for_test(&home);
 								    config.model_provider = model_provider;
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    config.model_auto_compact_token_limit = Some(200_000);
-												Add AuthManager and enhance GetAuthStatus command (#2577)

This PR adds a central `AuthManager` struct that manages the auth
information used across conversations and the MCP server. Prior to this,
each conversation and the MCP server got their own private snapshots of
the auth information, and changes to one (such as a logout or token
refresh) were not seen by others.

This is especially problematic when multiple instances of the CLI are
run. For example, consider the case where you start CLI 1 and log in to
ChatGPT account X and then start CLI 2 and log out and then log in to
ChatGPT account Y. The conversation in CLI 1 is still using account X,
but if you create a new conversation, it will suddenly (and
unexpectedly) switch to account Y.

With the `AuthManager`, auth information is read from disk at the time
the `ConversationManager` is constructed, and it is cached in memory.
All new conversations use this same auth information, as do any token
refreshes.

The `AuthManager` is also used by the MCP server's GetAuthStatus
command, which now returns the auth method currently used by the MCP
server.

This PR also includes an enhancement to the GetAuthStatus command. It
now accepts two new (optional) input parameters: `include_token` and
`refresh_token`. Callers can use this to request the in-use auth token
and can optionally request to refresh the token.

The PR also adds tests for the login and auth APIs that I recently added
to the MCP server.
											
										
										
											2025-08-22 13:10:11 -07:00
+								    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								    let NewConversation {
 								        conversation: codex,
 								        session_configured,
 								        ..
 								    } = conversation_manager.new_conversation(config).await.unwrap();
 								    let rollout_path = session_configured.rollout_path;
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
 								    // 1) Normal user input – should hit server once.
 								    codex
 								        .submit(Op::UserInput {
 								            items: vec![InputItem::Text {
 								                text: "hello world".into(),
 								            }],
 								        })
 								        .await
 								        .unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    // 2) Summarize – second hit with summarization instructions.
 								    codex.submit(Op::Compact).await.unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    // 3) Next user input – third hit; history should include only the summary.
 								    codex
 								        .submit(Op::UserInput {
 								            items: vec![InputItem::Text {
 								                text: THIRD_USER_MSG.into(),
 								            }],
 								        })
 								        .await
 								        .unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    // Inspect the three captured requests.
 								    let requests = server.received_requests().await.unwrap();
 								    assert_eq!(requests.len(), 3, "expected exactly three requests");
 								    let req1 = &requests[0];
 								    let req2 = &requests[1];
 								    let req3 = &requests[2];
 								    let body1 = req1.body_json::<serde_json::Value>().unwrap();
 								    let body2 = req2.body_json::<serde_json::Value>().unwrap();
 								    let body3 = req3.body_json::<serde_json::Value>().unwrap();
 								    // System instructions should change for the summarization turn.
 								    let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
 								    let instr2 = body2.get("instructions").and_then(|v| v.as_str()).unwrap();
 								    assert_ne!(
 								        instr1, instr2,
 								        "summarization should override base instructions"
 								    );
 								    assert!(
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        instr2.contains("You have exceeded the maximum number of tokens"),
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								        "summarization instructions not applied"
 								    );
 								    // The summarization request should include the injected user input marker.
 								    let input2 = body2.get("input").and_then(|v| v.as_array()).unwrap();
 								    // The last item is the user message created from the injected input.
 								    let last2 = input2.last().unwrap();
 								    assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
 								    assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
 								    let text2 = last2["content"][0]["text"].as_str().unwrap();
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    assert!(
 								        text2.contains(SUMMARIZE_TRIGGER),
 								        "expected summarize trigger, got `{text2}`"
 								    );
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    // Third request must contain the refreshed instructions, bridge summary message and new user msg.
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
 								    println!("third request body: {body3}");
 								    assert!(
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        input3.len() >= 3,
 								        "expected refreshed context and new user message in third request"
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    );
 								    // Collect all (role, text) message tuples.
 								    let mut messages: Vec<(String, String)> = Vec::new();
 								    for item in input3 {
 								        if item["type"].as_str() == Some("message") {
 								            let role = item["role"].as_str().unwrap_or_default().to_string();
 								            let text = item["content"][0]["text"]
 								                .as_str()
 								                .unwrap_or_default()
 								                .to_string();
 								            messages.push((role, text));
 								        }
 								    }
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    // No previous assistant messages should remain and the new user message is present.
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    let assistant_count = messages.iter().filter(|(r, _)| r == "assistant").count();
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    assert_eq!(assistant_count, 0, "assistant history should be cleared");
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    assert!(
 								        messages
 								            .iter()
 								            .any(|(r, t)| r == "user" && t == THIRD_USER_MSG),
 								        "third request should include the new user message"
 								    );
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    let Some((_, bridge_text)) = messages.iter().find(|(role, text)| {
 								        role == "user"
 								            && (text.contains("Here were the user messages")
 								                || text.contains("Here are all the user messages"))
 								            && text.contains(SUMMARY_TEXT)
 								    }) else {
 								        panic!("expected a bridge message containing the summary");
 								    };
 								    assert!(
 								        bridge_text.contains("hello world"),
 								        "bridge should capture earlier user messages"
 								    );
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    assert!(
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        !bridge_text.contains(SUMMARIZE_TRIGGER),
 								        "bridge text should not echo the summarize trigger"
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    );
 								    assert!(
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								        !messages
 								            .iter()
 								            .any(|(_, text)| text.contains(SUMMARIZE_TRIGGER)),
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								        "third request should not include the summarize trigger"
 								    );
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
 								    // Shut down Codex to flush rollout entries before inspecting the file.
 								    codex.submit(Op::Shutdown).await.unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;
 								    // Verify rollout contains APITurn entries for each API call and a Compacted entry.
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
+								    println!("rollout path: {}", rollout_path.display());
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								    let text = std::fs::read_to_string(&rollout_path).unwrap_or_else(|e| {
 								        panic!(
 								            "failed to read rollout file {}: {e}",
 								            rollout_path.display()
 								        )
 								    });
 								    let mut api_turn_count = 0usize;
 								    let mut saw_compacted_summary = false;
 								    for line in text.lines() {
 								        let trimmed = line.trim();
 								        if trimmed.is_empty() {
 								            continue;
 								        }
 								        let Ok(entry): Result<RolloutLine, _> = serde_json::from_str(trimmed) else {
 								            continue;
 								        };
 								        match entry.item {
 								            RolloutItem::TurnContext(_) => {
 								                api_turn_count += 1;
 								            }
 								            RolloutItem::Compacted(ci) => {
 								                if ci.message == SUMMARY_TEXT {
 								                    saw_compacted_summary = true;
 								                }
 								            }
 								            _ => {}
 								        }
 								    }
 								    assert!(
 								        api_turn_count == 3,
 								        "expected three APITurn entries in rollout"
 								    );
 								    assert!(
 								        saw_compacted_summary,
 								        "expected a Compacted entry containing the summarizer output"
 								    );
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								}
-												feat: context compaction (#3446)

## Compact feature:
1. Stops the model when the context window become too large
2. Add a user turn, asking for the model to summarize
3. Build a bridge that contains all the previous user message + the
summary. Rendered from a template
4. Start sampling again from a clean conversation with only that bridge
											
										
										
											2025-09-12 13:07:10 -07:00
 								#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 								async fn auto_compact_runs_after_token_limit_hit() {
 								    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
 								        println!(
 								            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
 								        );
 								        return;
 								    }
 								    let server = start_mock_server().await;
 								    let sse1 = sse(vec![
 								        ev_assistant_message("m1", FIRST_REPLY),
 								        ev_completed_with_tokens("r1", 70_000),
 								    ]);
 								    let sse2 = sse(vec![
 								        ev_assistant_message("m2", "SECOND_REPLY"),
 								        ev_completed_with_tokens("r2", 330_000),
 								    ]);
 								    let sse3 = sse(vec![
 								        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
 								        ev_completed_with_tokens("r3", 200),
 								    ]);
 								    let first_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains(FIRST_AUTO_MSG)
 								            && !body.contains(SECOND_AUTO_MSG)
 								            && !body.contains("You have exceeded the maximum number of tokens")
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(first_matcher)
 								        .respond_with(sse_response(sse1))
 								        .mount(&server)
 								        .await;
 								    let second_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains(SECOND_AUTO_MSG)
 								            && body.contains(FIRST_AUTO_MSG)
 								            && !body.contains("You have exceeded the maximum number of tokens")
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(second_matcher)
 								        .respond_with(sse_response(sse2))
 								        .mount(&server)
 								        .await;
 								    let third_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains("You have exceeded the maximum number of tokens")
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(third_matcher)
 								        .respond_with(sse_response(sse3))
 								        .mount(&server)
 								        .await;
 								    let model_provider = ModelProviderInfo {
 								        base_url: Some(format!("{}/v1", server.uri())),
 								        ..built_in_model_providers()["openai"].clone()
 								    };
 								    let home = TempDir::new().unwrap();
 								    let mut config = load_default_config_for_test(&home);
 								    config.model_provider = model_provider;
 								    config.model_auto_compact_token_limit = Some(200_000);
 								    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
 								    let codex = conversation_manager
 								        .new_conversation(config)
 								        .await
 								        .unwrap()
 								        .conversation;
 								    codex
 								        .submit(Op::UserInput {
 								            items: vec![InputItem::Text {
 								                text: FIRST_AUTO_MSG.into(),
 								            }],
 								        })
 								        .await
 								        .unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    codex
 								        .submit(Op::UserInput {
 								            items: vec![InputItem::Text {
 								                text: SECOND_AUTO_MSG.into(),
 								            }],
 								        })
 								        .await
 								        .unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    // wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    let requests = server.received_requests().await.unwrap();
 								    assert_eq!(requests.len(), 3, "auto compact should add a third request");
 								    let body3 = requests[2].body_json::<serde_json::Value>().unwrap();
 								    let instructions = body3
 								        .get("instructions")
 								        .and_then(|v| v.as_str())
 								        .unwrap_or_default();
 								    assert!(
 								        instructions.contains("You have exceeded the maximum number of tokens"),
 								        "auto compact should reuse summarization instructions"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 								async fn auto_compact_persists_rollout_entries() {
 								    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
 								        println!(
 								            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
 								        );
 								        return;
 								    }
 								    let server = start_mock_server().await;
 								    let sse1 = sse(vec![
 								        ev_assistant_message("m1", FIRST_REPLY),
 								        ev_completed_with_tokens("r1", 70_000),
 								    ]);
 								    let sse2 = sse(vec![
 								        ev_assistant_message("m2", "SECOND_REPLY"),
 								        ev_completed_with_tokens("r2", 330_000),
 								    ]);
 								    let sse3 = sse(vec![
 								        ev_assistant_message("m3", AUTO_SUMMARY_TEXT),
 								        ev_completed_with_tokens("r3", 200),
 								    ]);
 								    let first_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains(FIRST_AUTO_MSG)
 								            && !body.contains(SECOND_AUTO_MSG)
 								            && !body.contains("You have exceeded the maximum number of tokens")
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(first_matcher)
 								        .respond_with(sse_response(sse1))
 								        .mount(&server)
 								        .await;
 								    let second_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains(SECOND_AUTO_MSG)
 								            && body.contains(FIRST_AUTO_MSG)
 								            && !body.contains("You have exceeded the maximum number of tokens")
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(second_matcher)
 								        .respond_with(sse_response(sse2))
 								        .mount(&server)
 								        .await;
 								    let third_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains("You have exceeded the maximum number of tokens")
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(third_matcher)
 								        .respond_with(sse_response(sse3))
 								        .mount(&server)
 								        .await;
 								    let model_provider = ModelProviderInfo {
 								        base_url: Some(format!("{}/v1", server.uri())),
 								        ..built_in_model_providers()["openai"].clone()
 								    };
 								    let home = TempDir::new().unwrap();
 								    let mut config = load_default_config_for_test(&home);
 								    config.model_provider = model_provider;
 								    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
 								    let NewConversation {
 								        conversation: codex,
 								        session_configured,
 								        ..
 								    } = conversation_manager.new_conversation(config).await.unwrap();
 								    codex
 								        .submit(Op::UserInput {
 								            items: vec![InputItem::Text {
 								                text: FIRST_AUTO_MSG.into(),
 								            }],
 								        })
 								        .await
 								        .unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    codex
 								        .submit(Op::UserInput {
 								            items: vec![InputItem::Text {
 								                text: SECOND_AUTO_MSG.into(),
 								            }],
 								        })
 								        .await
 								        .unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    codex.submit(Op::Shutdown).await.unwrap();
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::ShutdownComplete)).await;
 								    let rollout_path = session_configured.rollout_path;
 								    let text = std::fs::read_to_string(&rollout_path).unwrap_or_else(|e| {
 								        panic!(
 								            "failed to read rollout file {}: {e}",
 								            rollout_path.display()
 								        )
 								    });
 								    let mut turn_context_count = 0usize;
 								    for line in text.lines() {
 								        let trimmed = line.trim();
 								        if trimmed.is_empty() {
 								            continue;
 								        }
 								        let Ok(entry): Result<RolloutLine, _> = serde_json::from_str(trimmed) else {
 								            continue;
 								        };
 								        match entry.item {
 								            RolloutItem::TurnContext(_) => {
 								                turn_context_count += 1;
 								            }
 								            RolloutItem::Compacted(_) => {}
 								            _ => {}
 								        }
 								    }
 								    assert!(
 								        turn_context_count >= 2,
 								        "expected at least two turn context entries, got {turn_context_count}"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 								async fn auto_compact_stops_after_failed_attempt() {
 								    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
 								        println!(
 								            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
 								        );
 								        return;
 								    }
 								    let server = start_mock_server().await;
 								    let sse1 = sse(vec![
 								        ev_assistant_message("m1", FIRST_REPLY),
 								        ev_completed_with_tokens("r1", 500),
 								    ]);
 								    let sse2 = sse(vec![
 								        ev_assistant_message("m2", SUMMARY_TEXT),
 								        ev_completed_with_tokens("r2", 50),
 								    ]);
 								    let sse3 = sse(vec![
 								        ev_assistant_message("m3", STILL_TOO_BIG_REPLY),
 								        ev_completed_with_tokens("r3", 500),
 								    ]);
 								    let first_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains(FIRST_AUTO_MSG)
 								            && !body.contains("You have exceeded the maximum number of tokens")
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(first_matcher)
 								        .respond_with(sse_response(sse1.clone()))
 								        .mount(&server)
 								        .await;
 								    let second_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        body.contains("You have exceeded the maximum number of tokens")
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(second_matcher)
 								        .respond_with(sse_response(sse2.clone()))
 								        .mount(&server)
 								        .await;
 								    let third_matcher = |req: &wiremock::Request| {
 								        let body = std::str::from_utf8(&req.body).unwrap_or("");
 								        !body.contains("You have exceeded the maximum number of tokens")
 								            && body.contains(SUMMARY_TEXT)
 								    };
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .and(third_matcher)
 								        .respond_with(sse_response(sse3.clone()))
 								        .mount(&server)
 								        .await;
 								    let model_provider = ModelProviderInfo {
 								        base_url: Some(format!("{}/v1", server.uri())),
 								        ..built_in_model_providers()["openai"].clone()
 								    };
 								    let home = TempDir::new().unwrap();
 								    let mut config = load_default_config_for_test(&home);
 								    config.model_provider = model_provider;
 								    config.model_auto_compact_token_limit = Some(200);
 								    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
 								    let codex = conversation_manager
 								        .new_conversation(config)
 								        .await
 								        .unwrap()
 								        .conversation;
 								    codex
 								        .submit(Op::UserInput {
 								            items: vec![InputItem::Text {
 								                text: FIRST_AUTO_MSG.into(),
 								            }],
 								        })
 								        .await
 								        .unwrap();
 								    let error_event = wait_for_event(&codex, |ev| matches!(ev, EventMsg::Error(_))).await;
 								    let EventMsg::Error(ErrorEvent { message }) = error_event else {
 								        panic!("expected error event");
 								    };
 								    assert!(
 								        message.contains("limit"),
 								        "error message should include limit information: {message}"
 								    );
 								    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
 								    let requests = server.received_requests().await.unwrap();
 								    assert_eq!(
 								        requests.len(),
 ,
 								        "auto compact should attempt at most one summarization before erroring"
 								    );
 								    let last_body = requests[2].body_json::<serde_json::Value>().unwrap();
 								    let instructions = last_body
 								        .get("instructions")
 								        .and_then(|v| v.as_str())
 								        .unwrap_or_default();
 								    assert!(
 								        !instructions.contains("You have exceeded the maximum number of tokens"),
 								        "third request should be the follow-up turn, not another summarization"
 								    );
 								}
 								#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 								async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_events() {
 								    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
 								        println!(
 								            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
 								        );
 								        return;
 								    }
 								    let server = start_mock_server().await;
 								    let sse1 = sse(vec![
 								        ev_assistant_message("m1", FIRST_REPLY),
 								        ev_completed_with_tokens("r1", 500),
 								    ]);
 								    let sse2 = sse(vec![
 								        ev_assistant_message("m2", FIRST_AUTO_SUMMARY),
 								        ev_completed_with_tokens("r2", 50),
 								    ]);
 								    let sse3 = sse(vec![
 								        ev_function_call(DUMMY_CALL_ID, DUMMY_FUNCTION_NAME, "{}"),
 								        ev_completed_with_tokens("r3", 150),
 								    ]);
 								    let sse4 = sse(vec![
 								        ev_assistant_message("m4", SECOND_LARGE_REPLY),
 								        ev_completed_with_tokens("r4", 450),
 								    ]);
 								    let sse5 = sse(vec![
 								        ev_assistant_message("m5", SECOND_AUTO_SUMMARY),
 								        ev_completed_with_tokens("r5", 60),
 								    ]);
 								    let sse6 = sse(vec![
 								        ev_assistant_message("m6", FINAL_REPLY),
 								        ev_completed_with_tokens("r6", 120),
 								    ]);
 								    #[derive(Clone)]
 								    struct SeqResponder {
 								        bodies: Arc<Vec<String>>,
 								        calls: Arc<AtomicUsize>,
 								        requests: Arc<Mutex<Vec<Vec<u8>>>>,
 								    }
 								    impl SeqResponder {
 								        fn new(bodies: Vec<String>) -> Self {
 								            Self {
 								                bodies: Arc::new(bodies),
 								                calls: Arc::new(AtomicUsize::new(0)),
 								                requests: Arc::new(Mutex::new(Vec::new())),
 								            }
 								        }
 								        fn recorded_requests(&self) -> Vec<Vec<u8>> {
 								            self.requests.lock().unwrap().clone()
 								        }
 								    }
 								    impl Respond for SeqResponder {
 								        fn respond(&self, req: &Request) -> ResponseTemplate {
 								            let idx = self.calls.fetch_add(1, Ordering::SeqCst);
 								            self.requests.lock().unwrap().push(req.body.clone());
 								            let body = self
 								                .bodies
 								                .get(idx)
 								                .unwrap_or_else(|| panic!("unexpected request index {idx}"))
 								                .clone();
 								            ResponseTemplate::new(200)
 								                .insert_header("content-type", "text/event-stream")
 								                .set_body_raw(body, "text/event-stream")
 								        }
 								    }
 								    let responder = SeqResponder::new(vec![sse1, sse2, sse3, sse4, sse5, sse6]);
 								    Mock::given(method("POST"))
 								        .and(path("/v1/responses"))
 								        .respond_with(responder.clone())
 								        .expect(6)
 								        .mount(&server)
 								        .await;
 								    let model_provider = ModelProviderInfo {
 								        base_url: Some(format!("{}/v1", server.uri())),
 								        ..built_in_model_providers()["openai"].clone()
 								    };
 								    let home = TempDir::new().unwrap();
 								    let mut config = load_default_config_for_test(&home);
 								    config.model_provider = model_provider;
 								    config.model_auto_compact_token_limit = Some(200);
 								    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
 								    let codex = conversation_manager
 								        .new_conversation(config)
 								        .await
 								        .unwrap()
 								        .conversation;
 								    codex
 								        .submit(Op::UserInput {
 								            items: vec![InputItem::Text {
 								                text: MULTI_AUTO_MSG.into(),
 								            }],
 								        })
 								        .await
 								        .unwrap();
 								    loop {
 								        let event = codex.next_event().await.unwrap();
 								        if let EventMsg::TaskComplete(_) = &event.msg
 								            && !event.id.starts_with("auto-compact-")
 								        {
 								            break;
 								        }
 								    }
 								    let request_bodies: Vec<String> = responder
 								        .recorded_requests()
 								        .into_iter()
 								        .map(|body| String::from_utf8(body).unwrap_or_default())
 								        .collect();
 								    assert_eq!(
 								        request_bodies.len(),
 ,
 								        "expected six requests including two auto compactions"
 								    );
 								    assert!(
 								        request_bodies[0].contains(MULTI_AUTO_MSG),
 								        "first request should contain the user input"
 								    );
 								    assert!(
 								        request_bodies[1].contains("You have exceeded the maximum number of tokens"),
 								        "first auto compact request should use summarization instructions"
 								    );
 								    assert!(
 								        request_bodies[3].contains(&format!("unsupported call: {DUMMY_FUNCTION_NAME}")),
 								        "function call output should be sent before the second auto compact"
 								    );
 								    assert!(
 								        request_bodies[4].contains("You have exceeded the maximum number of tokens"),
 								        "second auto compact request should reuse summarization instructions"
 								    );
 								}