test: faster test execution in codex-core (#2633)

this dramatically improves time to run `cargo test -p codex-core` (~25x speedup). before: ``` cargo test -p codex-core 35.96s user 68.63s system 19% cpu 8:49.80 total ``` after: ``` cargo test -p codex-core 5.51s user 8.16s system 63% cpu 21.407 total ``` both tests measured "hot", i.e. on a 2nd run with no filesystem changes, to exclude compile times. approach inspired by [Delete Cargo Integration Tests](https://matklad.github.io/2021/02/27/delete-cargo-integration-tests.html), we move all test cases in tests/ into a single suite in order to have a single binary, as there is significant overhead for each test binary executed, and because test execution is only parallelized with a single binary.
2025-08-24 11:10:53 -07:00
parent c6a52d611c
commit 32bbbbad61
56 changed files with 78 additions and 3 deletions
--- a/codex-rs/core/tests/suite/cli_stream.rs
+++ b/codex-rs/core/tests/suite/cli_stream.rs
@@ -0,0 +1,581 @@
+use assert_cmd::Command as AssertCommand;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use std::time::Duration;
+use std::time::Instant;
+use tempfile::TempDir;
+use uuid::Uuid;
+use walkdir::WalkDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+/// Tests streaming chat completions through the CLI using a mock server.
+/// This test:
+/// 1. Sets up a mock server that simulates OpenAI's chat completions API
+/// 2. Configures codex to use this mock server via a custom provider
+/// 3. Sends a simple "hello?" prompt and verifies the streamed response
+/// 4. Ensures the response is received exactly once and contains "hi"
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chat_mode_stream_cli() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let server = MockServer::start().await;
+    let sse = concat!(
+        "data: {\"choices\":[{\"delta\":{\"content\":\"hi\"}}]}\n\n",
+        "data: {\"choices\":[{\"delta\":{}}]}\n\n",
+        "data: [DONE]\n\n"
+    );
+    Mock::given(method("POST"))
+        .and(path("/v1/chat/completions"))
+        .respond_with(
+            ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(sse, "text/event-stream"),
+        )
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let home = TempDir::new().unwrap();
+    let provider_override = format!(
+        "model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"chat\" }}",
+        server.uri()
+    );
+    let mut cmd = AssertCommand::new("cargo");
+    cmd.arg("run")
+        .arg("-p")
+        .arg("codex-cli")
+        .arg("--quiet")
+        .arg("--")
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("-c")
+        .arg(&provider_override)
+        .arg("-c")
+        .arg("model_provider=\"mock\"")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg("hello?");
+    cmd.env("CODEX_HOME", home.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()));
+
+    let output = cmd.output().unwrap();
+    println!("Status: {}", output.status);
+    println!("Stdout:\n{}", String::from_utf8_lossy(&output.stdout));
+    println!("Stderr:\n{}", String::from_utf8_lossy(&output.stderr));
+    assert!(output.status.success());
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    let hi_lines = stdout.lines().filter(|line| line.trim() == "hi").count();
+    assert_eq!(hi_lines, 1, "Expected exactly one line with 'hi'");
+
+    server.verify().await;
+}
+
+/// Verify that passing `-c experimental_instructions_file=...` to the CLI
+/// overrides the built-in base instructions by inspecting the request body
+/// received by a mock OpenAI Responses endpoint.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_cli_applies_experimental_instructions_file() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Start mock server which will capture the request and return a minimal
+    // SSE stream for a single turn.
+    let server = MockServer::start().await;
+    let sse = concat!(
+        "data: {\"type\":\"response.created\",\"response\":{}}\n\n",
+        "data: {\"type\":\"response.completed\",\"response\":{\"id\":\"r1\"}}\n\n"
+    );
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(
+            ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(sse, "text/event-stream"),
+        )
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    // Create a temporary instructions file with a unique marker we can assert
+    // appears in the outbound request payload.
+    let custom = TempDir::new().unwrap();
+    let marker = "cli-experimental-instructions-marker";
+    let custom_path = custom.path().join("instr.md");
+    std::fs::write(&custom_path, marker).unwrap();
+    let custom_path_str = custom_path.to_string_lossy().replace('\\', "/");
+
+    // Build a provider override that points at the mock server and instructs
+    // Codex to use the Responses API with the dummy env var.
+    let provider_override = format!(
+        "model_providers.mock={{ name = \"mock\", base_url = \"{}/v1\", env_key = \"PATH\", wire_api = \"responses\" }}",
+        server.uri()
+    );
+
+    let home = TempDir::new().unwrap();
+    let mut cmd = AssertCommand::new("cargo");
+    cmd.arg("run")
+        .arg("-p")
+        .arg("codex-cli")
+        .arg("--quiet")
+        .arg("--")
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("-c")
+        .arg(&provider_override)
+        .arg("-c")
+        .arg("model_provider=\"mock\"")
+        .arg("-c")
+        .arg(format!(
+            "experimental_instructions_file=\"{custom_path_str}\""
+        ))
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg("hello?\n");
+    cmd.env("CODEX_HOME", home.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()));
+
+    let output = cmd.output().unwrap();
+    println!("Status: {}", output.status);
+    println!("Stdout:\n{}", String::from_utf8_lossy(&output.stdout));
+    println!("Stderr:\n{}", String::from_utf8_lossy(&output.stderr));
+    assert!(output.status.success());
+
+    // Inspect the captured request and verify our custom base instructions were
+    // included in the `instructions` field.
+    let request = &server.received_requests().await.unwrap()[0];
+    let body = request.body_json::<serde_json::Value>().unwrap();
+    let instructions = body
+        .get("instructions")
+        .and_then(|v| v.as_str())
+        .unwrap_or_default()
+        .to_string();
+    assert!(
+        instructions.contains(marker),
+        "instructions did not contain custom marker; got: {instructions}"
+    );
+}
+
+/// Tests streaming responses through the CLI using a local SSE fixture file.
+/// This test:
+/// 1. Uses a pre-recorded SSE response fixture instead of a live server
+/// 2. Configures codex to read from this fixture via CODEX_RS_SSE_FIXTURE env var
+/// 3. Sends a "hello?" prompt and verifies the response
+/// 4. Ensures the fixture content is correctly streamed through the CLI
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn responses_api_stream_cli() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let fixture =
+        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
+
+    let home = TempDir::new().unwrap();
+    let mut cmd = AssertCommand::new("cargo");
+    cmd.arg("run")
+        .arg("-p")
+        .arg("codex-cli")
+        .arg("--quiet")
+        .arg("--")
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg("hello?");
+    cmd.env("CODEX_HOME", home.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("CODEX_RS_SSE_FIXTURE", fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local");
+
+    let output = cmd.output().unwrap();
+    assert!(output.status.success());
+    let stdout = String::from_utf8_lossy(&output.stdout);
+    assert!(stdout.contains("fixture hello"));
+}
+
+/// End-to-end: create a session (writes rollout), verify the file, then resume and confirm append.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn integration_creates_and_checks_session_file() {
+    // Honor sandbox network restrictions for CI parity with the other tests.
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // 1. Temp home so we read/write isolated session files.
+    let home = TempDir::new().unwrap();
+
+    // 2. Unique marker we'll look for in the session log.
+    let marker = format!("integration-test-{}", Uuid::new_v4());
+    let prompt = format!("echo {marker}");
+
+    // 3. Use the same offline SSE fixture as responses_api_stream_cli so the test is hermetic.
+    let fixture =
+        std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/cli_responses_fixture.sse");
+
+    // 4. Run the codex CLI through cargo (ensures the right bin is built) and invoke `exec`,
+    //    which is what records a session.
+    let mut cmd = AssertCommand::new("cargo");
+    cmd.arg("run")
+        .arg("-p")
+        .arg("codex-cli")
+        .arg("--quiet")
+        .arg("--")
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt);
+    cmd.env("CODEX_HOME", home.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        // Required for CLI arg parsing even though fixture short-circuits network usage.
+        .env("OPENAI_BASE_URL", "http://unused.local");
+
+    let output = cmd.output().unwrap();
+    assert!(
+        output.status.success(),
+        "codex-cli exec failed: {}",
+        String::from_utf8_lossy(&output.stderr)
+    );
+
+    // Wait for sessions dir to appear.
+    let sessions_dir = home.path().join("sessions");
+    let dir_deadline = Instant::now() + Duration::from_secs(5);
+    while !sessions_dir.exists() && Instant::now() < dir_deadline {
+        std::thread::sleep(Duration::from_millis(50));
+    }
+    assert!(sessions_dir.exists(), "sessions directory never appeared");
+
+    // Find the session file that contains `marker`.
+    let deadline = Instant::now() + Duration::from_secs(10);
+    let mut matching_path: Option<std::path::PathBuf> = None;
+    while Instant::now() < deadline && matching_path.is_none() {
+        for entry in WalkDir::new(&sessions_dir) {
+            let entry = match entry {
+                Ok(e) => e,
+                Err(_) => continue,
+            };
+            if !entry.file_type().is_file() {
+                continue;
+            }
+            if !entry.file_name().to_string_lossy().ends_with(".jsonl") {
+                continue;
+            }
+            let path = entry.path();
+            let Ok(content) = std::fs::read_to_string(path) else {
+                continue;
+            };
+            let mut lines = content.lines();
+            if lines.next().is_none() {
+                continue;
+            }
+            for line in lines {
+                if line.trim().is_empty() {
+                    continue;
+                }
+                let item: serde_json::Value = match serde_json::from_str(line) {
+                    Ok(v) => v,
+                    Err(_) => continue,
+                };
+                if item.get("type").and_then(|t| t.as_str()) == Some("message")
+                    && let Some(c) = item.get("content")
+                    && c.to_string().contains(&marker)
+                {
+                    matching_path = Some(path.to_path_buf());
+                    break;
+                }
+            }
+        }
+        if matching_path.is_none() {
+            std::thread::sleep(Duration::from_millis(50));
+        }
+    }
+
+    let path = match matching_path {
+        Some(p) => p,
+        None => panic!("No session file containing the marker was found"),
+    };
+
+    // Basic sanity checks on location and metadata.
+    let rel = match path.strip_prefix(&sessions_dir) {
+        Ok(r) => r,
+        Err(_) => panic!("session file should live under sessions/"),
+    };
+    let comps: Vec<String> = rel
+        .components()
+        .map(|c| c.as_os_str().to_string_lossy().into_owned())
+        .collect();
+    assert_eq!(
+        comps.len(),
+        4,
+        "Expected sessions/YYYY/MM/DD/<file>, got {rel:?}"
+    );
+    let year = &comps[0];
+    let month = &comps[1];
+    let day = &comps[2];
+    assert!(
+        year.len() == 4 && year.chars().all(|c| c.is_ascii_digit()),
+        "Year dir not 4-digit numeric: {year}"
+    );
+    assert!(
+        month.len() == 2 && month.chars().all(|c| c.is_ascii_digit()),
+        "Month dir not zero-padded 2-digit numeric: {month}"
+    );
+    assert!(
+        day.len() == 2 && day.chars().all(|c| c.is_ascii_digit()),
+        "Day dir not zero-padded 2-digit numeric: {day}"
+    );
+    if let Ok(m) = month.parse::<u8>() {
+        assert!((1..=12).contains(&m), "Month out of range: {m}");
+    }
+    if let Ok(d) = day.parse::<u8>() {
+        assert!((1..=31).contains(&d), "Day out of range: {d}");
+    }
+
+    let content =
+        std::fs::read_to_string(&path).unwrap_or_else(|_| panic!("Failed to read session file"));
+    let mut lines = content.lines();
+    let meta_line = lines
+        .next()
+        .ok_or("missing session meta line")
+        .unwrap_or_else(|_| panic!("missing session meta line"));
+    let meta: serde_json::Value = serde_json::from_str(meta_line)
+        .unwrap_or_else(|_| panic!("Failed to parse session meta line as JSON"));
+    assert!(meta.get("id").is_some(), "SessionMeta missing id");
+    assert!(
+        meta.get("timestamp").is_some(),
+        "SessionMeta missing timestamp"
+    );
+
+    let mut found_message = false;
+    for line in lines {
+        if line.trim().is_empty() {
+            continue;
+        }
+        let Ok(item) = serde_json::from_str::<serde_json::Value>(line) else {
+            continue;
+        };
+        if item.get("type").and_then(|t| t.as_str()) == Some("message")
+            && let Some(c) = item.get("content")
+            && c.to_string().contains(&marker)
+        {
+            found_message = true;
+            break;
+        }
+    }
+    assert!(
+        found_message,
+        "No message found in session file containing the marker"
+    );
+
+    // Second run: resume and append.
+    let orig_len = content.lines().count();
+    let marker2 = format!("integration-resume-{}", Uuid::new_v4());
+    let prompt2 = format!("echo {marker2}");
+    // Cross‑platform safe resume override.  On Windows, backslashes in a TOML string must be escaped
+    // or the parse will fail and the raw literal (including quotes) may be preserved all the way down
+    // to Config, which in turn breaks resume because the path is invalid. Normalize to forward slashes
+    // to sidestep the issue.
+    let resume_path_str = path.to_string_lossy().replace('\\', "/");
+    let resume_override = format!("experimental_resume=\"{resume_path_str}\"");
+    let mut cmd2 = AssertCommand::new("cargo");
+    cmd2.arg("run")
+        .arg("-p")
+        .arg("codex-cli")
+        .arg("--quiet")
+        .arg("--")
+        .arg("exec")
+        .arg("--skip-git-repo-check")
+        .arg("-c")
+        .arg(&resume_override)
+        .arg("-C")
+        .arg(env!("CARGO_MANIFEST_DIR"))
+        .arg(&prompt2);
+    cmd2.env("CODEX_HOME", home.path())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("CODEX_RS_SSE_FIXTURE", &fixture)
+        .env("OPENAI_BASE_URL", "http://unused.local");
+
+    let output2 = cmd2.output().unwrap();
+    assert!(output2.status.success(), "resume codex-cli run failed");
+
+    // The rollout writer runs on a background async task; give it a moment to flush.
+    let mut new_len = orig_len;
+    let deadline = Instant::now() + Duration::from_secs(5);
+    let mut content2 = String::new();
+    while Instant::now() < deadline {
+        if let Ok(c) = std::fs::read_to_string(&path) {
+            let count = c.lines().count();
+            if count > orig_len {
+                content2 = c;
+                new_len = count;
+                break;
+            }
+        }
+        std::thread::sleep(Duration::from_millis(50));
+    }
+    if content2.is_empty() {
+        // last attempt
+        content2 = std::fs::read_to_string(&path).unwrap();
+        new_len = content2.lines().count();
+    }
+    assert!(new_len > orig_len, "rollout file did not grow after resume");
+    assert!(content2.contains(&marker), "rollout lost original marker");
+    assert!(
+        content2.contains(&marker2),
+        "rollout missing resumed marker"
+    );
+}
+
+/// Integration test to verify git info is collected and recorded in session files.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn integration_git_info_unit_test() {
+    // This test verifies git info collection works independently
+    // without depending on the full CLI integration
+
+    // 1. Create temp directory for git repo
+    let temp_dir = TempDir::new().unwrap();
+    let git_repo = temp_dir.path().to_path_buf();
+    let envs = vec![
+        ("GIT_CONFIG_GLOBAL", "/dev/null"),
+        ("GIT_CONFIG_NOSYSTEM", "1"),
+    ];
+
+    // 2. Initialize a git repository with some content
+    let init_output = std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["init"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+    assert!(init_output.status.success(), "git init failed");
+
+    // Configure git user (required for commits)
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["config", "user.name", "Integration Test"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["config", "user.email", "test@example.com"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // Create a test file and commit it
+    let test_file = git_repo.join("test.txt");
+    std::fs::write(&test_file, "integration test content").unwrap();
+
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["add", "."])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    let commit_output = std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["commit", "-m", "Integration test commit"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+    assert!(commit_output.status.success(), "git commit failed");
+
+    // Create a branch to test branch detection
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args(["checkout", "-b", "integration-test-branch"])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // Add a remote to test repository URL detection
+    std::process::Command::new("git")
+        .envs(envs.clone())
+        .args([
+            "remote",
+            "add",
+            "origin",
+            "https://github.com/example/integration-test.git",
+        ])
+        .current_dir(&git_repo)
+        .output()
+        .unwrap();
+
+    // 3. Test git info collection directly
+    let git_info = codex_core::git_info::collect_git_info(&git_repo).await;
+
+    // 4. Verify git info is present and contains expected data
+    assert!(git_info.is_some(), "Git info should be collected");
+
+    let git_info = git_info.unwrap();
+
+    // Check that we have a commit hash
+    assert!(
+        git_info.commit_hash.is_some(),
+        "Git info should contain commit_hash"
+    );
+    let commit_hash = git_info.commit_hash.as_ref().unwrap();
+    assert_eq!(commit_hash.len(), 40, "Commit hash should be 40 characters");
+    assert!(
+        commit_hash.chars().all(|c| c.is_ascii_hexdigit()),
+        "Commit hash should be hexadecimal"
+    );
+
+    // Check that we have the correct branch
+    assert!(git_info.branch.is_some(), "Git info should contain branch");
+    let branch = git_info.branch.as_ref().unwrap();
+    assert_eq!(
+        branch, "integration-test-branch",
+        "Branch should match what we created"
+    );
+
+    // Check that we have the repository URL
+    assert!(
+        git_info.repository_url.is_some(),
+        "Git info should contain repository_url"
+    );
+    let repo_url = git_info.repository_url.as_ref().unwrap();
+    assert_eq!(
+        repo_url, "https://github.com/example/integration-test.git",
+        "Repository URL should match what we configured"
+    );
+
+    println!("✅ Git info collection test passed!");
+    println!("   Commit: {commit_hash}");
+    println!("   Branch: {branch}");
+    println!("   Repo: {repo_url}");
+
+    // 5. Test serialization to ensure it works in SessionMeta
+    let serialized = serde_json::to_string(&git_info).unwrap();
+    let deserialized: codex_core::git_info::GitInfo = serde_json::from_str(&serialized).unwrap();
+
+    assert_eq!(git_info.commit_hash, deserialized.commit_hash);
+    assert_eq!(git_info.branch, deserialized.branch);
+    assert_eq!(git_info.repository_url, deserialized.repository_url);
+
+    println!("✅ Git info serialization test passed!");
+}
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -0,0 +1,748 @@
+use codex_core::ConversationManager;
+use codex_core::ModelProviderInfo;
+use codex_core::NewConversation;
+use codex_core::WireApi;
+use codex_core::built_in_model_providers;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_login::AuthMode;
+use codex_login::CodexAuth;
+use core_test_support::load_default_config_for_test;
+use core_test_support::load_sse_fixture_with_id;
+use core_test_support::wait_for_event;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::header_regex;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+use wiremock::matchers::query_param;
+
+/// Build minimal SSE stream with completed marker using the JSON fixture.
+fn sse_completed(id: &str) -> String {
+    load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
+}
+
+#[expect(clippy::unwrap_used)]
+fn assert_message_role(request_body: &serde_json::Value, role: &str) {
+    assert_eq!(request_body["role"].as_str().unwrap(), role);
+}
+
+#[expect(clippy::expect_used)]
+fn assert_message_starts_with(request_body: &serde_json::Value, text: &str) {
+    let content = request_body["content"][0]["text"]
+        .as_str()
+        .expect("invalid message content");
+
+    assert!(
+        content.starts_with(text),
+        "expected message content '{content}' to start with '{text}'"
+    );
+}
+
+#[expect(clippy::expect_used)]
+fn assert_message_ends_with(request_body: &serde_json::Value, text: &str) {
+    let content = request_body["content"][0]["text"]
+        .as_str()
+        .expect("invalid message content");
+
+    assert!(
+        content.ends_with(text),
+        "expected message content '{content}' to end with '{text}'"
+    );
+}
+
+/// Writes an `auth.json` into the provided `codex_home` with the specified parameters.
+/// Returns the fake JWT string written to `tokens.id_token`.
+#[expect(clippy::unwrap_used)]
+fn write_auth_json(
+    codex_home: &TempDir,
+    openai_api_key: Option<&str>,
+    chatgpt_plan_type: &str,
+    access_token: &str,
+    account_id: Option<&str>,
+) -> String {
+    use base64::Engine as _;
+    use serde_json::json;
+
+    let header = json!({ "alg": "none", "typ": "JWT" });
+    let payload = json!({
+        "email": "user@example.com",
+        "https://api.openai.com/auth": {
+            "chatgpt_plan_type": chatgpt_plan_type,
+            "chatgpt_account_id": account_id.unwrap_or("acc-123")
+        }
+    });
+
+    let b64 = |b: &[u8]| base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(b);
+    let header_b64 = b64(&serde_json::to_vec(&header).unwrap());
+    let payload_b64 = b64(&serde_json::to_vec(&payload).unwrap());
+    let signature_b64 = b64(b"sig");
+    let fake_jwt = format!("{header_b64}.{payload_b64}.{signature_b64}");
+
+    let mut tokens = json!({
+        "id_token": fake_jwt,
+        "access_token": access_token,
+        "refresh_token": "refresh-test",
+    });
+    if let Some(acc) = account_id {
+        tokens["account_id"] = json!(acc);
+    }
+
+    let auth_json = json!({
+        "OPENAI_API_KEY": openai_api_key,
+        "tokens": tokens,
+        // RFC3339 datetime; value doesn't matter for these tests
+        "last_refresh": "2025-08-06T20:41:36.232376Z",
+    });
+
+    std::fs::write(
+        codex_home.path().join("auth.json"),
+        serde_json::to_string_pretty(&auth_json).unwrap(),
+    )
+    .unwrap();
+
+    fake_jwt
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn includes_session_id_and_model_headers_in_request() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    // First request – must NOT include `previous_response_id`.
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    // Init session
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let NewConversation {
+        conversation: codex,
+        conversation_id,
+        session_configured: _,
+    } = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation");
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // get request from the server
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_session_id = request.headers.get("session_id").unwrap();
+    let request_authorization = request.headers.get("authorization").unwrap();
+    let request_originator = request.headers.get("originator").unwrap();
+
+    assert_eq!(
+        request_session_id.to_str().unwrap(),
+        conversation_id.to_string()
+    );
+    assert_eq!(request_originator.to_str().unwrap(), "codex_cli_rs");
+    assert_eq!(
+        request_authorization.to_str().unwrap(),
+        "Bearer Test API Key"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn includes_base_instructions_override_in_request() {
+    // Mock server
+    let server = MockServer::start().await;
+
+    // First request – must NOT include `previous_response_id`.
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+
+    config.base_instructions = Some("test instructions".to_string());
+    config.model_provider = model_provider;
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_body = request.body_json::<serde_json::Value>().unwrap();
+
+    assert!(
+        request_body["instructions"]
+            .as_str()
+            .unwrap()
+            .contains("test instructions")
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn originator_config_override_is_used() {
+    // Mock server
+    let server = MockServer::start().await;
+
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    config.responses_originator_header = "my_override".to_owned();
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_originator = request.headers.get("originator").unwrap();
+    assert_eq!(request_originator.to_str().unwrap(), "my_override");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn chatgpt_auth_sends_correct_request() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    // First request – must NOT include `previous_response_id`.
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/api/codex/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/api/codex", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    // Init session
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
+    let NewConversation {
+        conversation: codex,
+        conversation_id,
+        session_configured: _,
+    } = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation");
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // get request from the server
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_session_id = request.headers.get("session_id").unwrap();
+    let request_authorization = request.headers.get("authorization").unwrap();
+    let request_originator = request.headers.get("originator").unwrap();
+    let request_chatgpt_account_id = request.headers.get("chatgpt-account-id").unwrap();
+    let request_body = request.body_json::<serde_json::Value>().unwrap();
+
+    assert_eq!(
+        request_session_id.to_str().unwrap(),
+        conversation_id.to_string()
+    );
+    assert_eq!(request_originator.to_str().unwrap(), "codex_cli_rs");
+    assert_eq!(
+        request_authorization.to_str().unwrap(),
+        "Bearer Access Token"
+    );
+    assert_eq!(request_chatgpt_account_id.to_str().unwrap(), "account_id");
+    assert!(!request_body["store"].as_bool().unwrap());
+    assert!(request_body["stream"].as_bool().unwrap());
+    assert_eq!(
+        request_body["include"][0].as_str().unwrap(),
+        "reasoning.encrypted_content"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn prefers_chatgpt_token_when_config_prefers_chatgpt() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    // Expect ChatGPT base path and correct headers
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .and(header_regex("Authorization", r"Bearer Access-123"))
+        .and(header_regex("chatgpt-account-id", r"acc-123"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    // Init session
+    let codex_home = TempDir::new().unwrap();
+    // Write auth.json that contains both API key and ChatGPT tokens for a plan that should prefer ChatGPT.
+    let _jwt = write_auth_json(
+        &codex_home,
+        Some("sk-test-key"),
+        "pro",
+        "Access-123",
+        Some("acc-123"),
+    );
+
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    config.preferred_auth_method = AuthMode::ChatGPT;
+
+    let auth_manager =
+        match CodexAuth::from_codex_home(codex_home.path(), config.preferred_auth_method) {
+            Ok(Some(auth)) => codex_login::AuthManager::from_auth_for_testing(auth),
+            Ok(None) => panic!("No CodexAuth found in codex_home"),
+            Err(e) => panic!("Failed to load CodexAuth: {}", e),
+        };
+    let conversation_manager = ConversationManager::new(auth_manager);
+    let NewConversation {
+        conversation: codex,
+        ..
+    } = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation");
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // verify request body flags
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_body = request.body_json::<serde_json::Value>().unwrap();
+    assert!(
+        !request_body["store"].as_bool().unwrap(),
+        "store should be false for ChatGPT auth"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn prefers_apikey_when_config_prefers_apikey_even_with_chatgpt_tokens() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    // Expect API key header, no ChatGPT account header required.
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .and(header_regex("Authorization", r"Bearer sk-test-key"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    // Init session
+    let codex_home = TempDir::new().unwrap();
+    // Write auth.json that contains both API key and ChatGPT tokens for a plan that should prefer ChatGPT,
+    // but config will force API key preference.
+    let _jwt = write_auth_json(
+        &codex_home,
+        Some("sk-test-key"),
+        "pro",
+        "Access-123",
+        Some("acc-123"),
+    );
+
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    config.preferred_auth_method = AuthMode::ApiKey;
+
+    let auth_manager =
+        match CodexAuth::from_codex_home(codex_home.path(), config.preferred_auth_method) {
+            Ok(Some(auth)) => codex_login::AuthManager::from_auth_for_testing(auth),
+            Ok(None) => panic!("No CodexAuth found in codex_home"),
+            Err(e) => panic!("Failed to load CodexAuth: {}", e),
+        };
+    let conversation_manager = ConversationManager::new(auth_manager);
+    let NewConversation {
+        conversation: codex,
+        ..
+    } = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation");
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // verify request body flags
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_body = request.body_json::<serde_json::Value>().unwrap();
+    assert!(
+        request_body["store"].as_bool().unwrap(),
+        "store should be true for API key auth"
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn includes_user_instructions_message_in_request() {
+    let server = MockServer::start().await;
+
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be nice".to_string());
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let request = &server.received_requests().await.unwrap()[0];
+    let request_body = request.body_json::<serde_json::Value>().unwrap();
+
+    assert!(
+        !request_body["instructions"]
+            .as_str()
+            .unwrap()
+            .contains("be nice")
+    );
+    assert_message_role(&request_body["input"][0], "user");
+    assert_message_starts_with(&request_body["input"][0], "<user_instructions>");
+    assert_message_ends_with(&request_body["input"][0], "</user_instructions>");
+    assert_message_role(&request_body["input"][1], "user");
+    assert_message_starts_with(&request_body["input"][1], "<environment_context>");
+    assert_message_ends_with(&request_body["input"][1], "</environment_context>");
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn azure_overrides_assign_properties_used_for_responses_url() {
+    let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    // First request – must NOT include `previous_response_id`.
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    // Expect POST to /openai/responses with api-version query param
+    Mock::given(method("POST"))
+        .and(path("/openai/responses"))
+        .and(query_param("api-version", "2025-04-01-preview"))
+        .and(header_regex("Custom-Header", "Value"))
+        .and(header_regex(
+            "Authorization",
+            format!(
+                "Bearer {}",
+                std::env::var(existing_env_var_with_random_value).unwrap()
+            )
+            .as_str(),
+        ))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let provider = ModelProviderInfo {
+        name: "custom".to_string(),
+        base_url: Some(format!("{}/openai", server.uri())),
+        // Reuse the existing environment variable to avoid using unsafe code
+        env_key: Some(existing_env_var_with_random_value.to_string()),
+        query_params: Some(std::collections::HashMap::from([(
+            "api-version".to_string(),
+            "2025-04-01-preview".to_string(),
+        )])),
+        env_key_instructions: None,
+        wire_api: WireApi::Responses,
+        http_headers: Some(std::collections::HashMap::from([(
+            "Custom-Header".to_string(),
+            "Value".to_string(),
+        )])),
+        env_http_headers: None,
+        request_max_retries: None,
+        stream_max_retries: None,
+        stream_idle_timeout_ms: None,
+        requires_openai_auth: false,
+    };
+
+    // Init session
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = provider;
+
+    let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn env_var_overrides_loaded_auth() {
+    let existing_env_var_with_random_value = if cfg!(windows) { "USERNAME" } else { "USER" };
+
+    // Mock server
+    let server = MockServer::start().await;
+
+    // First request – must NOT include `previous_response_id`.
+    let first = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp1"), "text/event-stream");
+
+    // Expect POST to /openai/responses with api-version query param
+    Mock::given(method("POST"))
+        .and(path("/openai/responses"))
+        .and(query_param("api-version", "2025-04-01-preview"))
+        .and(header_regex("Custom-Header", "Value"))
+        .and(header_regex(
+            "Authorization",
+            format!(
+                "Bearer {}",
+                std::env::var(existing_env_var_with_random_value).unwrap()
+            )
+            .as_str(),
+        ))
+        .respond_with(first)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    let provider = ModelProviderInfo {
+        name: "custom".to_string(),
+        base_url: Some(format!("{}/openai", server.uri())),
+        // Reuse the existing environment variable to avoid using unsafe code
+        env_key: Some(existing_env_var_with_random_value.to_string()),
+        query_params: Some(std::collections::HashMap::from([(
+            "api-version".to_string(),
+            "2025-04-01-preview".to_string(),
+        )])),
+        env_key_instructions: None,
+        wire_api: WireApi::Responses,
+        http_headers: Some(std::collections::HashMap::from([(
+            "Custom-Header".to_string(),
+            "Value".to_string(),
+        )])),
+        env_http_headers: None,
+        request_max_retries: None,
+        stream_max_retries: None,
+        stream_idle_timeout_ms: None,
+        requires_openai_auth: false,
+    };
+
+    // Init session
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = provider;
+
+    let conversation_manager = ConversationManager::with_auth(create_dummy_codex_auth());
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+}
+
+fn create_dummy_codex_auth() -> CodexAuth {
+    CodexAuth::create_dummy_chatgpt_auth_for_testing()
+}
--- a/codex-rs/core/tests/suite/compact.rs
+++ b/codex-rs/core/tests/suite/compact.rs
@@ -0,0 +1,251 @@
+#![expect(clippy::unwrap_used)]
+
+use codex_core::ConversationManager;
+use codex_core::ModelProviderInfo;
+use codex_core::built_in_model_providers;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_login::CodexAuth;
+use core_test_support::load_default_config_for_test;
+use core_test_support::wait_for_event;
+use serde_json::Value;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+use pretty_assertions::assert_eq;
+
+// --- Test helpers -----------------------------------------------------------
+
+/// Build an SSE stream body from a list of JSON events.
+fn sse(events: Vec<Value>) -> String {
+    use std::fmt::Write as _;
+    let mut out = String::new();
+    for ev in events {
+        let kind = ev.get("type").and_then(|v| v.as_str()).unwrap();
+        writeln!(&mut out, "event: {kind}").unwrap();
+        if !ev.as_object().map(|o| o.len() == 1).unwrap_or(false) {
+            write!(&mut out, "data: {ev}\n\n").unwrap();
+        } else {
+            out.push('\n');
+        }
+    }
+    out
+}
+
+/// Convenience: SSE event for a completed response with a specific id.
+fn ev_completed(id: &str) -> Value {
+    serde_json::json!({
+        "type": "response.completed",
+        "response": {
+            "id": id,
+            "usage": {"input_tokens":0,"input_tokens_details":null,"output_tokens":0,"output_tokens_details":null,"total_tokens":0}
+        }
+    })
+}
+
+/// Convenience: SSE event for a single assistant message output item.
+fn ev_assistant_message(id: &str, text: &str) -> Value {
+    serde_json::json!({
+        "type": "response.output_item.done",
+        "item": {
+            "type": "message",
+            "role": "assistant",
+            "id": id,
+            "content": [{"type": "output_text", "text": text}]
+        }
+    })
+}
+
+fn sse_response(body: String) -> ResponseTemplate {
+    ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(body, "text/event-stream")
+}
+
+async fn mount_sse_once<M>(server: &MockServer, matcher: M, body: String)
+where
+    M: wiremock::Match + Send + Sync + 'static,
+{
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .and(matcher)
+        .respond_with(sse_response(body))
+        .expect(1)
+        .mount(server)
+        .await;
+}
+
+const FIRST_REPLY: &str = "FIRST_REPLY";
+const SUMMARY_TEXT: &str = "SUMMARY_ONLY_CONTEXT";
+const SUMMARIZE_TRIGGER: &str = "Start Summarization";
+const THIRD_USER_MSG: &str = "next turn";
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn summarize_context_three_requests_and_instructions() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Set up a mock server that we can inspect after the run.
+    let server = MockServer::start().await;
+
+    // SSE 1: assistant replies normally so it is recorded in history.
+    let sse1 = sse(vec![
+        ev_assistant_message("m1", FIRST_REPLY),
+        ev_completed("r1"),
+    ]);
+
+    // SSE 2: summarizer returns a summary message.
+    let sse2 = sse(vec![
+        ev_assistant_message("m2", SUMMARY_TEXT),
+        ev_completed("r2"),
+    ]);
+
+    // SSE 3: minimal completed; we only need to capture the request body.
+    let sse3 = sse(vec![ev_completed("r3")]);
+
+    // Mount three expectations, one per request, matched by body content.
+    let first_matcher = |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains("\"text\":\"hello world\"")
+            && !body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+    };
+    mount_sse_once(&server, first_matcher, sse1).await;
+
+    let second_matcher = |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains(&format!("\"text\":\"{SUMMARIZE_TRIGGER}\""))
+    };
+    mount_sse_once(&server, second_matcher, sse2).await;
+
+    let third_matcher = |req: &wiremock::Request| {
+        let body = std::str::from_utf8(&req.body).unwrap_or("");
+        body.contains(&format!("\"text\":\"{THIRD_USER_MSG}\""))
+    };
+    mount_sse_once(&server, third_matcher, sse3).await;
+
+    // Build config pointing to the mock server and spawn Codex.
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home);
+    config.model_provider = model_provider;
+    let conversation_manager = ConversationManager::with_auth(CodexAuth::from_api_key("dummy"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .unwrap()
+        .conversation;
+
+    // 1) Normal user input – should hit server once.
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello world".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // 2) Summarize – second hit with summarization instructions.
+    codex.submit(Op::Compact).await.unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // 3) Next user input – third hit; history should include only the summary.
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: THIRD_USER_MSG.into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Inspect the three captured requests.
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 3, "expected exactly three requests");
+
+    let req1 = &requests[0];
+    let req2 = &requests[1];
+    let req3 = &requests[2];
+
+    let body1 = req1.body_json::<serde_json::Value>().unwrap();
+    let body2 = req2.body_json::<serde_json::Value>().unwrap();
+    let body3 = req3.body_json::<serde_json::Value>().unwrap();
+
+    // System instructions should change for the summarization turn.
+    let instr1 = body1.get("instructions").and_then(|v| v.as_str()).unwrap();
+    let instr2 = body2.get("instructions").and_then(|v| v.as_str()).unwrap();
+    assert_ne!(
+        instr1, instr2,
+        "summarization should override base instructions"
+    );
+    assert!(
+        instr2.contains("You are a summarization assistant"),
+        "summarization instructions not applied"
+    );
+
+    // The summarization request should include the injected user input marker.
+    let input2 = body2.get("input").and_then(|v| v.as_array()).unwrap();
+    // The last item is the user message created from the injected input.
+    let last2 = input2.last().unwrap();
+    assert_eq!(last2.get("type").unwrap().as_str().unwrap(), "message");
+    assert_eq!(last2.get("role").unwrap().as_str().unwrap(), "user");
+    let text2 = last2["content"][0]["text"].as_str().unwrap();
+    assert!(text2.contains(SUMMARIZE_TRIGGER));
+
+    // Third request must contain only the summary from step 2 as prior history plus new user msg.
+    let input3 = body3.get("input").and_then(|v| v.as_array()).unwrap();
+    println!("third request body: {body3}");
+    assert!(
+        input3.len() >= 2,
+        "expected summary + new user message in third request"
+    );
+
+    // Collect all (role, text) message tuples.
+    let mut messages: Vec<(String, String)> = Vec::new();
+    for item in input3 {
+        if item["type"].as_str() == Some("message") {
+            let role = item["role"].as_str().unwrap_or_default().to_string();
+            let text = item["content"][0]["text"]
+                .as_str()
+                .unwrap_or_default()
+                .to_string();
+            messages.push((role, text));
+        }
+    }
+
+    // Exactly one assistant message should remain after compaction and the new user message is present.
+    let assistant_count = messages.iter().filter(|(r, _)| r == "assistant").count();
+    assert_eq!(
+        assistant_count, 1,
+        "exactly one assistant message should remain after compaction"
+    );
+    assert!(
+        messages
+            .iter()
+            .any(|(r, t)| r == "user" && t == THIRD_USER_MSG),
+        "third request should include the new user message"
+    );
+    assert!(
+        !messages.iter().any(|(_, t)| t.contains("hello world")),
+        "third request should not include the original user input"
+    );
+    assert!(
+        !messages.iter().any(|(_, t)| t.contains(SUMMARIZE_TRIGGER)),
+        "third request should not include the summarize trigger"
+    );
+}
--- a/codex-rs/core/tests/suite/exec.rs
+++ b/codex-rs/core/tests/suite/exec.rs
@@ -0,0 +1,125 @@
+#![cfg(target_os = "macos")]
+
+use std::collections::HashMap;
+
+use codex_core::exec::ExecParams;
+use codex_core::exec::ExecToolCallOutput;
+use codex_core::exec::SandboxType;
+use codex_core::exec::process_exec_tool_call;
+use codex_core::protocol::SandboxPolicy;
+use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
+use tempfile::TempDir;
+
+use codex_core::error::Result;
+
+use codex_core::get_platform_sandbox;
+
+fn skip_test() -> bool {
+    if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
+        eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
+        return true;
+    }
+
+    false
+}
+
+#[expect(clippy::expect_used)]
+async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
+    let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
+    assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
+
+    let params = ExecParams {
+        command: cmd.iter().map(|s| s.to_string()).collect(),
+        cwd: tmp.path().to_path_buf(),
+        timeout_ms: Some(1000),
+        env: HashMap::new(),
+        with_escalated_permissions: None,
+        justification: None,
+    };
+
+    let policy = SandboxPolicy::new_read_only_policy();
+
+    process_exec_tool_call(params, sandbox_type, &policy, &None, None).await
+}
+
+/// Command succeeds with exit code 0 normally
+#[tokio::test]
+async fn exit_code_0_succeeds() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let cmd = vec!["echo", "hello"];
+
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+    assert_eq!(output.stdout.text, "hello\n");
+    assert_eq!(output.stderr.text, "");
+    assert_eq!(output.stdout.truncated_after_lines, None);
+}
+
+/// Command succeeds with exit code 0 normally
+#[tokio::test]
+async fn truncates_output_lines() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let cmd = vec!["seq", "300"];
+
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+
+    let expected_output = (1..=300)
+        .map(|i| format!("{i}\n"))
+        .collect::<Vec<_>>()
+        .join("");
+    assert_eq!(output.stdout.text, expected_output);
+    assert_eq!(output.stdout.truncated_after_lines, None);
+}
+
+/// Command succeeds with exit code 0 normally
+#[tokio::test]
+async fn truncates_output_bytes() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    // each line is 1000 bytes
+    let cmd = vec!["bash", "-lc", "seq 15 | awk '{printf \"%-1000s\\n\", $0}'"];
+
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+
+    assert!(output.stdout.text.len() >= 15000);
+    assert_eq!(output.stdout.truncated_after_lines, None);
+}
+
+/// Command not found returns exit code 127, this is not considered a sandbox error
+#[tokio::test]
+async fn exit_command_not_found_is_ok() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
+    run_test_cmd(tmp, cmd).await.unwrap();
+}
+
+/// Writing a file fails and should be considered a sandbox error
+#[tokio::test]
+async fn write_file_fails_as_sandbox_error() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let path = tmp.path().join("test.txt");
+    let cmd = vec![
+        "/user/bin/touch",
+        path.to_str().expect("should be able to get path"),
+    ];
+
+    assert!(run_test_cmd(tmp, cmd).await.is_err());
+}
--- a/codex-rs/core/tests/suite/exec_stream_events.rs
+++ b/codex-rs/core/tests/suite/exec_stream_events.rs
@@ -0,0 +1,172 @@
+#![cfg(unix)]
+
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+use async_channel::Receiver;
+use codex_core::exec::ExecParams;
+use codex_core::exec::SandboxType;
+use codex_core::exec::StdoutStream;
+use codex_core::exec::process_exec_tool_call;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::ExecCommandOutputDeltaEvent;
+use codex_core::protocol::ExecOutputStream;
+use codex_core::protocol::SandboxPolicy;
+
+fn collect_stdout_events(rx: Receiver<Event>) -> Vec<u8> {
+    let mut out = Vec::new();
+    while let Ok(ev) = rx.try_recv() {
+        if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
+            stream: ExecOutputStream::Stdout,
+            chunk,
+            ..
+        }) = ev.msg
+        {
+            out.extend_from_slice(&chunk);
+        }
+    }
+    out
+}
+
+#[tokio::test]
+async fn test_exec_stdout_stream_events_echo() {
+    let (tx, rx) = async_channel::unbounded::<Event>();
+
+    let stdout_stream = StdoutStream {
+        sub_id: "test-sub".to_string(),
+        call_id: "call-1".to_string(),
+        tx_event: tx,
+    };
+
+    let cmd = vec![
+        "/bin/sh".to_string(),
+        "-c".to_string(),
+        // Use printf for predictable behavior across shells
+        "printf 'hello-world\n'".to_string(),
+    ];
+
+    let params = ExecParams {
+        command: cmd,
+        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        timeout_ms: Some(5_000),
+        env: HashMap::new(),
+        with_escalated_permissions: None,
+        justification: None,
+    };
+
+    let policy = SandboxPolicy::new_read_only_policy();
+
+    let result = process_exec_tool_call(
+        params,
+        SandboxType::None,
+        &policy,
+        &None,
+        Some(stdout_stream),
+    )
+    .await;
+
+    let result = match result {
+        Ok(r) => r,
+        Err(e) => panic!("process_exec_tool_call failed: {e}"),
+    };
+
+    assert_eq!(result.exit_code, 0);
+    assert_eq!(result.stdout.text, "hello-world\n");
+
+    let streamed = collect_stdout_events(rx);
+    // We should have received at least the same contents (possibly in one chunk)
+    assert_eq!(String::from_utf8_lossy(&streamed), "hello-world\n");
+}
+
+#[tokio::test]
+async fn test_exec_stderr_stream_events_echo() {
+    let (tx, rx) = async_channel::unbounded::<Event>();
+
+    let stdout_stream = StdoutStream {
+        sub_id: "test-sub".to_string(),
+        call_id: "call-2".to_string(),
+        tx_event: tx,
+    };
+
+    let cmd = vec![
+        "/bin/sh".to_string(),
+        "-c".to_string(),
+        // Write to stderr explicitly
+        "printf 'oops\n' 1>&2".to_string(),
+    ];
+
+    let params = ExecParams {
+        command: cmd,
+        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        timeout_ms: Some(5_000),
+        env: HashMap::new(),
+        with_escalated_permissions: None,
+        justification: None,
+    };
+
+    let policy = SandboxPolicy::new_read_only_policy();
+
+    let result = process_exec_tool_call(
+        params,
+        SandboxType::None,
+        &policy,
+        &None,
+        Some(stdout_stream),
+    )
+    .await;
+
+    let result = match result {
+        Ok(r) => r,
+        Err(e) => panic!("process_exec_tool_call failed: {e}"),
+    };
+
+    assert_eq!(result.exit_code, 0);
+    assert_eq!(result.stdout.text, "");
+    assert_eq!(result.stderr.text, "oops\n");
+
+    // Collect only stderr delta events
+    let mut err = Vec::new();
+    while let Ok(ev) = rx.try_recv() {
+        if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
+            stream: ExecOutputStream::Stderr,
+            chunk,
+            ..
+        }) = ev.msg
+        {
+            err.extend_from_slice(&chunk);
+        }
+    }
+    assert_eq!(String::from_utf8_lossy(&err), "oops\n");
+}
+
+#[tokio::test]
+async fn test_aggregated_output_interleaves_in_order() {
+    // Spawn a shell that alternates stdout and stderr with sleeps to enforce order.
+    let cmd = vec![
+        "/bin/sh".to_string(),
+        "-c".to_string(),
+        "printf 'O1\\n'; sleep 0.01; printf 'E1\\n' 1>&2; sleep 0.01; printf 'O2\\n'; sleep 0.01; printf 'E2\\n' 1>&2".to_string(),
+    ];
+
+    let params = ExecParams {
+        command: cmd,
+        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        timeout_ms: Some(5_000),
+        env: HashMap::new(),
+        with_escalated_permissions: None,
+        justification: None,
+    };
+
+    let policy = SandboxPolicy::new_read_only_policy();
+
+    let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None)
+        .await
+        .expect("process_exec_tool_call");
+
+    assert_eq!(result.exit_code, 0);
+    assert_eq!(result.stdout.text, "O1\nO2\n");
+    assert_eq!(result.stderr.text, "E1\nE2\n");
+    assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n");
+    assert_eq!(result.aggregated_output.truncated_after_lines, None);
+}
--- a/codex-rs/core/tests/suite/live_cli.rs
+++ b/codex-rs/core/tests/suite/live_cli.rs
@@ -0,0 +1,148 @@
+#![expect(clippy::expect_used)]
+
+//! Optional smoke tests that hit the real OpenAI /v1/responses endpoint. They are `#[ignore]` by
+//! default so CI stays deterministic and free. Developers can run them locally with
+//! `cargo test --test live_cli -- --ignored` provided they set a valid `OPENAI_API_KEY`.
+
+use assert_cmd::prelude::*;
+use predicates::prelude::*;
+use std::process::Command;
+use std::process::Stdio;
+use tempfile::TempDir;
+
+fn require_api_key() -> String {
+    std::env::var("OPENAI_API_KEY")
+        .expect("OPENAI_API_KEY env var not set — skip running live tests")
+}
+
+/// Helper that spawns the binary inside a TempDir with minimal flags. Returns (Assert, TempDir).
+fn run_live(prompt: &str) -> (assert_cmd::assert::Assert, TempDir) {
+    #![expect(clippy::unwrap_used)]
+    use std::io::Read;
+    use std::io::Write;
+    use std::thread;
+
+    let dir = TempDir::new().unwrap();
+
+    // Build a plain `std::process::Command` so we have full control over the underlying stdio
+    // handles. `assert_cmd`’s own `Command` wrapper always forces stdout/stderr to be piped
+    // internally which prevents us from streaming them live to the terminal (see its `spawn`
+    // implementation). Instead we configure the std `Command` ourselves, then later hand the
+    // resulting `Output` to `assert_cmd` for the familiar assertions.
+
+    let mut cmd = Command::cargo_bin("codex-rs").unwrap();
+    cmd.current_dir(dir.path());
+    cmd.env("OPENAI_API_KEY", require_api_key());
+
+    // We want three things at once:
+    //   1. live streaming of the child’s stdout/stderr while the test is running
+    //   2. captured output so we can keep using assert_cmd’s `Assert` helpers
+    //   3. cross‑platform behavior (best effort)
+    //
+    // To get that we:
+    //   • set both stdout and stderr to `piped()` so we can read them programmatically
+    //   • spawn a thread for each stream that copies bytes into two sinks:
+    //       – the parent process’ stdout/stderr for live visibility
+    //       – an in‑memory buffer so we can pass it to `assert_cmd` later
+
+    // Pass the prompt through the `--` separator so the CLI knows when user input ends.
+    cmd.arg("--allow-no-git-exec")
+        .arg("-v")
+        .arg("--")
+        .arg(prompt);
+
+    cmd.stdin(Stdio::piped());
+    cmd.stdout(Stdio::piped());
+    cmd.stderr(Stdio::piped());
+
+    let mut child = cmd.spawn().expect("failed to spawn codex-rs");
+
+    // Send the terminating newline so Session::run exits after the first turn.
+    child
+        .stdin
+        .as_mut()
+        .expect("child stdin unavailable")
+        .write_all(b"\n")
+        .expect("failed to write to child stdin");
+
+    // Helper that tees a ChildStdout/ChildStderr into both the parent’s stdio and a Vec<u8>.
+    fn tee<R: Read + Send + 'static>(
+        mut reader: R,
+        mut writer: impl Write + Send + 'static,
+    ) -> thread::JoinHandle<Vec<u8>> {
+        thread::spawn(move || {
+            let mut buf = Vec::new();
+            let mut chunk = [0u8; 4096];
+            loop {
+                match reader.read(&mut chunk) {
+                    Ok(0) => break,
+                    Ok(n) => {
+                        writer.write_all(&chunk[..n]).ok();
+                        writer.flush().ok();
+                        buf.extend_from_slice(&chunk[..n]);
+                    }
+                    Err(_) => break,
+                }
+            }
+            buf
+        })
+    }
+
+    let stdout_handle = tee(
+        child.stdout.take().expect("child stdout"),
+        std::io::stdout(),
+    );
+    let stderr_handle = tee(
+        child.stderr.take().expect("child stderr"),
+        std::io::stderr(),
+    );
+
+    let status = child.wait().expect("failed to wait on child");
+    let stdout = stdout_handle.join().expect("stdout thread panicked");
+    let stderr = stderr_handle.join().expect("stderr thread panicked");
+
+    let output = std::process::Output {
+        status,
+        stdout,
+        stderr,
+    };
+
+    (output.assert(), dir)
+}
+
+#[ignore]
+#[test]
+fn live_create_file_hello_txt() {
+    if std::env::var("OPENAI_API_KEY").is_err() {
+        eprintln!("skipping live_create_file_hello_txt – OPENAI_API_KEY not set");
+        return;
+    }
+
+    let (assert, dir) = run_live(
+        "Use the shell tool with the apply_patch command to create a file named hello.txt containing the text 'hello'.",
+    );
+
+    assert.success();
+
+    let path = dir.path().join("hello.txt");
+    assert!(path.exists(), "hello.txt was not created by the model");
+
+    let contents = std::fs::read_to_string(path).unwrap();
+
+    assert_eq!(contents.trim(), "hello");
+}
+
+#[ignore]
+#[test]
+fn live_print_working_directory() {
+    if std::env::var("OPENAI_API_KEY").is_err() {
+        eprintln!("skipping live_print_working_directory – OPENAI_API_KEY not set");
+        return;
+    }
+
+    let (assert, dir) = run_live("Print the current working directory using the shell function.");
+
+    assert
+        .success()
+        .stdout(predicate::str::contains(dir.path().to_string_lossy()));
+}
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -0,0 +1,12 @@
+// Aggregates all former standalone integration tests as modules.
+
+mod cli_stream;
+mod client;
+mod compact;
+mod exec;
+mod exec_stream_events;
+mod live_cli;
+mod prompt_caching;
+mod seatbelt;
+mod stream_error_allows_next_turn;
+mod stream_no_completed;
--- a/codex-rs/core/tests/suite/prompt_caching.rs
+++ b/codex-rs/core/tests/suite/prompt_caching.rs
@@ -0,0 +1,558 @@
+#![allow(clippy::unwrap_used)]
+
+use codex_core::ConversationManager;
+use codex_core::ModelProviderInfo;
+use codex_core::built_in_model_providers;
+use codex_core::model_family::find_family_for_model;
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::protocol::SandboxPolicy;
+use codex_core::protocol_config_types::ReasoningEffort;
+use codex_core::protocol_config_types::ReasoningSummary;
+use codex_core::shell::default_user_shell;
+use codex_login::CodexAuth;
+use core_test_support::load_default_config_for_test;
+use core_test_support::load_sse_fixture_with_id;
+use core_test_support::wait_for_event;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+/// Build minimal SSE stream with completed marker using the JSON fixture.
+fn sse_completed(id: &str) -> String {
+    load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
+}
+
+fn assert_tool_names(body: &serde_json::Value, expected_names: &[&str]) {
+    assert_eq!(
+        body["tools"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .map(|t| t["name"].as_str().unwrap().to_string())
+            .collect::<Vec<_>>(),
+        expected_names
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn codex_mini_latest_tools() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Expect two POSTs to /v1/responses
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    config.include_apply_patch_tool = false;
+    config.model = "codex-mini-latest".to_string();
+    config.model_family = find_family_for_model("codex-mini-latest").unwrap();
+
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 1".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 2".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 2, "expected two POST requests");
+
+    let expected_instructions = [
+        include_str!("../../prompt.md"),
+        include_str!("../../../apply-patch/apply_patch_tool_instructions.md"),
+    ]
+    .join("\n");
+
+    let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body0["instructions"],
+        serde_json::json!(expected_instructions),
+    );
+    let body1 = requests[1].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body1["instructions"],
+        serde_json::json!(expected_instructions),
+    );
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
+async fn prompt_tools_are_consistent_across_requests() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Expect two POSTs to /v1/responses
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+    config.include_apply_patch_tool = true;
+    config.include_plan_tool = true;
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 1".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 2".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 2, "expected two POST requests");
+
+    let expected_instructions: &str = include_str!("../../prompt.md");
+    // our internal implementation is responsible for keeping tools in sync
+    // with the OpenAI schema, so we just verify the tool presence here
+    let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch"];
+    let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body0["instructions"],
+        serde_json::json!(expected_instructions),
+    );
+    assert_tool_names(&body0, expected_tools_names);
+
+    let body1 = requests[1].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body1["instructions"],
+        serde_json::json!(expected_instructions),
+    );
+    assert_tool_names(&body1, expected_tools_names);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn prefixes_context_and_instructions_once_and_consistently_across_requests() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Expect two POSTs to /v1/responses
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 1".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 2".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 2, "expected two POST requests");
+
+    let shell = default_user_shell().await;
+
+    let expected_env_text = format!(
+        r#"<environment_context>
+  <cwd>{}</cwd>
+  <approval_policy>on-request</approval_policy>
+  <sandbox_mode>read-only</sandbox_mode>
+  <network_access>restricted</network_access>
+{}</environment_context>"#,
+        cwd.path().to_string_lossy(),
+        match shell.name() {
+            Some(name) => format!("  <shell>{}</shell>\n", name),
+            None => String::new(),
+        }
+    );
+    let expected_ui_text =
+        "<user_instructions>\n\nbe consistent and helpful\n\n</user_instructions>";
+
+    let expected_env_msg = serde_json::json!({
+        "type": "message",
+        "id": serde_json::Value::Null,
+        "role": "user",
+        "content": [ { "type": "input_text", "text": expected_env_text } ]
+    });
+    let expected_ui_msg = serde_json::json!({
+        "type": "message",
+        "id": serde_json::Value::Null,
+        "role": "user",
+        "content": [ { "type": "input_text", "text": expected_ui_text } ]
+    });
+
+    let expected_user_message_1 = serde_json::json!({
+        "type": "message",
+        "id": serde_json::Value::Null,
+        "role": "user",
+        "content": [ { "type": "input_text", "text": "hello 1" } ]
+    });
+    let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
+    assert_eq!(
+        body1["input"],
+        serde_json::json!([expected_ui_msg, expected_env_msg, expected_user_message_1])
+    );
+
+    let expected_user_message_2 = serde_json::json!({
+        "type": "message",
+        "id": serde_json::Value::Null,
+        "role": "user",
+        "content": [ { "type": "input_text", "text": "hello 2" } ]
+    });
+    let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
+    let expected_body2 = serde_json::json!(
+        [
+            body1["input"].as_array().unwrap().as_slice(),
+            [expected_user_message_2].as_slice(),
+        ]
+        .concat()
+    );
+    assert_eq!(body2["input"], expected_body2);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn overrides_turn_context_but_keeps_cached_prefix_and_key_constant() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Expect two POSTs to /v1/responses
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    // First turn
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 1".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    let writable = TempDir::new().unwrap();
+    codex
+        .submit(Op::OverrideTurnContext {
+            cwd: None,
+            approval_policy: Some(AskForApproval::Never),
+            sandbox_policy: Some(SandboxPolicy::WorkspaceWrite {
+                writable_roots: vec![writable.path().to_path_buf()],
+                network_access: true,
+                exclude_tmpdir_env_var: true,
+                exclude_slash_tmp: true,
+            }),
+            model: Some("o3".to_string()),
+            effort: Some(ReasoningEffort::High),
+            summary: Some(ReasoningSummary::Detailed),
+        })
+        .await
+        .unwrap();
+
+    // Second turn after overrides
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 2".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Verify we issued exactly two requests, and the cached prefix stayed identical.
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 2, "expected two POST requests");
+
+    let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
+    let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
+    // prompt_cache_key should remain constant across overrides
+    assert_eq!(
+        body1["prompt_cache_key"], body2["prompt_cache_key"],
+        "prompt_cache_key should not change across overrides"
+    );
+
+    // The entire prefix from the first request should be identical and reused
+    // as the prefix of the second request, ensuring cache hit potential.
+    let expected_user_message_2 = serde_json::json!({
+        "type": "message",
+        "id": serde_json::Value::Null,
+        "role": "user",
+        "content": [ { "type": "input_text", "text": "hello 2" } ]
+    });
+    // After overriding the turn context, the environment context should be emitted again
+    // reflecting the new approval policy and sandbox settings. Omit cwd because it did
+    // not change.
+    let expected_env_text_2 = r#"<environment_context>
+  <approval_policy>never</approval_policy>
+  <sandbox_mode>workspace-write</sandbox_mode>
+  <network_access>enabled</network_access>
+</environment_context>"#;
+    let expected_env_msg_2 = serde_json::json!({
+        "type": "message",
+        "id": serde_json::Value::Null,
+        "role": "user",
+        "content": [ { "type": "input_text", "text": expected_env_text_2 } ]
+    });
+    let expected_body2 = serde_json::json!(
+        [
+            body1["input"].as_array().unwrap().as_slice(),
+            [expected_env_msg_2, expected_user_message_2].as_slice(),
+        ]
+        .concat()
+    );
+    assert_eq!(body2["input"], expected_body2);
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn per_turn_overrides_keep_cached_prefix_and_key_constant() {
+    use pretty_assertions::assert_eq;
+
+    let server = MockServer::start().await;
+
+    let sse = sse_completed("resp");
+    let template = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse, "text/event-stream");
+
+    // Expect two POSTs to /v1/responses
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(template)
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    let model_provider = ModelProviderInfo {
+        base_url: Some(format!("{}/v1", server.uri())),
+        ..built_in_model_providers()["openai"].clone()
+    };
+
+    let cwd = TempDir::new().unwrap();
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.cwd = cwd.path().to_path_buf();
+    config.model_provider = model_provider;
+    config.user_instructions = Some("be consistent and helpful".to_string());
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .expect("create new conversation")
+        .conversation;
+
+    // First turn
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello 1".into(),
+            }],
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Second turn using per-turn overrides via UserTurn
+    let new_cwd = TempDir::new().unwrap();
+    let writable = TempDir::new().unwrap();
+    codex
+        .submit(Op::UserTurn {
+            items: vec![InputItem::Text {
+                text: "hello 2".into(),
+            }],
+            cwd: new_cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::WorkspaceWrite {
+                writable_roots: vec![writable.path().to_path_buf()],
+                network_access: true,
+                exclude_tmpdir_env_var: true,
+                exclude_slash_tmp: true,
+            },
+            model: "o3".to_string(),
+            effort: ReasoningEffort::High,
+            summary: ReasoningSummary::Detailed,
+        })
+        .await
+        .unwrap();
+    wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
+
+    // Verify we issued exactly two requests, and the cached prefix stayed identical.
+    let requests = server.received_requests().await.unwrap();
+    assert_eq!(requests.len(), 2, "expected two POST requests");
+
+    let body1 = requests[0].body_json::<serde_json::Value>().unwrap();
+    let body2 = requests[1].body_json::<serde_json::Value>().unwrap();
+
+    // prompt_cache_key should remain constant across per-turn overrides
+    assert_eq!(
+        body1["prompt_cache_key"], body2["prompt_cache_key"],
+        "prompt_cache_key should not change across per-turn overrides"
+    );
+
+    // The entire prefix from the first request should be identical and reused
+    // as the prefix of the second request.
+    let expected_user_message_2 = serde_json::json!({
+        "type": "message",
+        "id": serde_json::Value::Null,
+        "role": "user",
+        "content": [ { "type": "input_text", "text": "hello 2" } ]
+    });
+    let expected_body2 = serde_json::json!(
+        [
+            body1["input"].as_array().unwrap().as_slice(),
+            [expected_user_message_2].as_slice(),
+        ]
+        .concat()
+    );
+    assert_eq!(body2["input"], expected_body2);
+}
--- a/codex-rs/core/tests/suite/seatbelt.rs
+++ b/codex-rs/core/tests/suite/seatbelt.rs
@@ -0,0 +1,201 @@
+#![cfg(target_os = "macos")]
+
+//! Tests for the macOS sandboxing that are specific to Seatbelt.
+//! Tests that apply to both Mac and Linux sandboxing should go in sandbox.rs.
+
+use std::collections::HashMap;
+use std::path::Path;
+use std::path::PathBuf;
+
+use codex_core::protocol::SandboxPolicy;
+use codex_core::seatbelt::spawn_command_under_seatbelt;
+use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
+use codex_core::spawn::StdioPolicy;
+use tempfile::TempDir;
+
+struct TestScenario {
+    repo_parent: PathBuf,
+    file_outside_repo: PathBuf,
+    repo_root: PathBuf,
+    file_in_repo_root: PathBuf,
+    file_in_dot_git_dir: PathBuf,
+}
+
+struct TestExpectations {
+    file_outside_repo_is_writable: bool,
+    file_in_repo_root_is_writable: bool,
+    file_in_dot_git_dir_is_writable: bool,
+}
+
+impl TestScenario {
+    async fn run_test(&self, policy: &SandboxPolicy, expectations: TestExpectations) {
+        if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
+            eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
+            return;
+        }
+
+        assert_eq!(
+            touch(&self.file_outside_repo, policy).await,
+            expectations.file_outside_repo_is_writable
+        );
+        assert_eq!(
+            self.file_outside_repo.exists(),
+            expectations.file_outside_repo_is_writable
+        );
+
+        assert_eq!(
+            touch(&self.file_in_repo_root, policy).await,
+            expectations.file_in_repo_root_is_writable
+        );
+        assert_eq!(
+            self.file_in_repo_root.exists(),
+            expectations.file_in_repo_root_is_writable
+        );
+
+        assert_eq!(
+            touch(&self.file_in_dot_git_dir, policy).await,
+            expectations.file_in_dot_git_dir_is_writable
+        );
+        assert_eq!(
+            self.file_in_dot_git_dir.exists(),
+            expectations.file_in_dot_git_dir_is_writable
+        );
+    }
+}
+
+/// If the user has added a workspace root that is not a Git repo root, then
+/// the user has to specify `--skip-git-repo-check` or go through some
+/// interstitial that indicates they are taking on some risk because Git
+/// cannot be used to backup their work before the agent begins.
+///
+/// Because the user has agreed to this risk, we do not try find all .git
+/// folders in the workspace and block them (though we could change our
+/// position on this in the future).
+#[tokio::test]
+async fn if_parent_of_repo_is_writable_then_dot_git_folder_is_writable() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let test_scenario = create_test_scenario(&tmp);
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots: vec![test_scenario.repo_parent.clone()],
+        network_access: false,
+        exclude_tmpdir_env_var: true,
+        exclude_slash_tmp: true,
+    };
+
+    test_scenario
+        .run_test(
+            &policy,
+            TestExpectations {
+                file_outside_repo_is_writable: true,
+                file_in_repo_root_is_writable: true,
+                file_in_dot_git_dir_is_writable: true,
+            },
+        )
+        .await;
+}
+
+/// When the writable root is the root of a Git repository (as evidenced by the
+/// presence of a .git folder), then the .git folder should be read-only if
+/// the policy is `WorkspaceWrite`.
+#[tokio::test]
+async fn if_git_repo_is_writable_root_then_dot_git_folder_is_read_only() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let test_scenario = create_test_scenario(&tmp);
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots: vec![test_scenario.repo_root.clone()],
+        network_access: false,
+        exclude_tmpdir_env_var: true,
+        exclude_slash_tmp: true,
+    };
+
+    test_scenario
+        .run_test(
+            &policy,
+            TestExpectations {
+                file_outside_repo_is_writable: false,
+                file_in_repo_root_is_writable: true,
+                file_in_dot_git_dir_is_writable: false,
+            },
+        )
+        .await;
+}
+
+/// Under DangerFullAccess, all writes should be permitted anywhere on disk,
+/// including inside the .git folder.
+#[tokio::test]
+async fn danger_full_access_allows_all_writes() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let test_scenario = create_test_scenario(&tmp);
+    let policy = SandboxPolicy::DangerFullAccess;
+
+    test_scenario
+        .run_test(
+            &policy,
+            TestExpectations {
+                file_outside_repo_is_writable: true,
+                file_in_repo_root_is_writable: true,
+                file_in_dot_git_dir_is_writable: true,
+            },
+        )
+        .await;
+}
+
+/// Under ReadOnly, writes should not be permitted anywhere on disk.
+#[tokio::test]
+async fn read_only_forbids_all_writes() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let test_scenario = create_test_scenario(&tmp);
+    let policy = SandboxPolicy::ReadOnly;
+
+    test_scenario
+        .run_test(
+            &policy,
+            TestExpectations {
+                file_outside_repo_is_writable: false,
+                file_in_repo_root_is_writable: false,
+                file_in_dot_git_dir_is_writable: false,
+            },
+        )
+        .await;
+}
+
+#[expect(clippy::expect_used)]
+fn create_test_scenario(tmp: &TempDir) -> TestScenario {
+    let repo_parent = tmp.path().to_path_buf();
+    let repo_root = repo_parent.join("repo");
+    let dot_git_dir = repo_root.join(".git");
+
+    std::fs::create_dir(&repo_root).expect("should be able to create repo root");
+    std::fs::create_dir(&dot_git_dir).expect("should be able to create .git dir");
+
+    TestScenario {
+        file_outside_repo: repo_parent.join("outside.txt"),
+        repo_parent,
+        file_in_repo_root: repo_root.join("repo_file.txt"),
+        repo_root,
+        file_in_dot_git_dir: dot_git_dir.join("dot_git_file.txt"),
+    }
+}
+
+#[expect(clippy::expect_used)]
+/// Note that `path` must be absolute.
+async fn touch(path: &Path, policy: &SandboxPolicy) -> bool {
+    assert!(path.is_absolute(), "Path must be absolute: {path:?}");
+    let mut child = spawn_command_under_seatbelt(
+        vec![
+            "/usr/bin/touch".to_string(),
+            path.to_string_lossy().to_string(),
+        ],
+        policy,
+        std::env::current_dir().expect("should be able to get current dir"),
+        StdioPolicy::RedirectForShellTool,
+        HashMap::new(),
+    )
+    .await
+    .expect("should be able to spawn command under seatbelt");
+    child
+        .wait()
+        .await
+        .expect("should be able to wait for child process")
+        .success()
+}
--- a/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
+++ b/codex-rs/core/tests/suite/stream_error_allows_next_turn.rs
@@ -0,0 +1,141 @@
+use std::time::Duration;
+
+use codex_core::ConversationManager;
+use codex_core::ModelProviderInfo;
+use codex_core::WireApi;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_login::CodexAuth;
+use core_test_support::load_default_config_for_test;
+use core_test_support::load_sse_fixture_with_id;
+use core_test_support::wait_for_event_with_timeout;
+use tempfile::TempDir;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::body_string_contains;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn sse_completed(id: &str) -> String {
+    load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn continue_after_stream_error() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let server = MockServer::start().await;
+
+    let fail = ResponseTemplate::new(500)
+        .insert_header("content-type", "application/json")
+        .set_body_string(
+            serde_json::json!({
+                "error": {"type": "bad_request", "message": "synthetic client error"}
+            })
+            .to_string(),
+        );
+
+    // The provider below disables request retries (request_max_retries = 0),
+    // so the failing request should only occur once.
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .and(body_string_contains("first message"))
+        .respond_with(fail)
+        .up_to_n_times(2)
+        .mount(&server)
+        .await;
+
+    let ok = ResponseTemplate::new(200)
+        .insert_header("content-type", "text/event-stream")
+        .set_body_raw(sse_completed("resp_ok2"), "text/event-stream");
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .and(body_string_contains("follow up"))
+        .respond_with(ok)
+        .expect(1)
+        .mount(&server)
+        .await;
+
+    // Configure a provider that uses the Responses API and points at our mock
+    // server. Use an existing env var (PATH) to satisfy the auth plumbing
+    // without requiring a real secret.
+    let provider = ModelProviderInfo {
+        name: "mock-openai".into(),
+        base_url: Some(format!("{}/v1", server.uri())),
+        env_key: Some("PATH".into()),
+        env_key_instructions: None,
+        wire_api: WireApi::Responses,
+        query_params: None,
+        http_headers: None,
+        env_http_headers: None,
+        request_max_retries: Some(1),
+        stream_max_retries: Some(1),
+        stream_idle_timeout_ms: Some(2_000),
+        requires_openai_auth: false,
+    };
+
+    let home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&home);
+    config.base_instructions = Some("You are a helpful assistant".to_string());
+    config.model_provider = provider;
+
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .unwrap()
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "first message".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    // Expect an Error followed by TaskComplete so the session is released.
+    wait_for_event_with_timeout(
+        &codex,
+        |ev| matches!(ev, EventMsg::Error(_)),
+        Duration::from_secs(5),
+    )
+    .await;
+
+    wait_for_event_with_timeout(
+        &codex,
+        |ev| matches!(ev, EventMsg::TaskComplete(_)),
+        Duration::from_secs(5),
+    )
+    .await;
+
+    // 2) Second turn: now send another prompt that should succeed using the
+    // mock server SSE stream. If the agent failed to clear the running task on
+    // error above, this submission would be rejected/queued indefinitely.
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "follow up".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    wait_for_event_with_timeout(
+        &codex,
+        |ev| matches!(ev, EventMsg::TaskComplete(_)),
+        Duration::from_secs(5),
+    )
+    .await;
+}
--- a/codex-rs/core/tests/suite/stream_no_completed.rs
+++ b/codex-rs/core/tests/suite/stream_no_completed.rs
@@ -0,0 +1,123 @@
+//! Verifies that the agent retries when the SSE stream terminates before
+//! delivering a `response.completed` event.
+
+use std::time::Duration;
+
+use codex_core::ConversationManager;
+use codex_core::ModelProviderInfo;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_login::CodexAuth;
+use core_test_support::load_default_config_for_test;
+use core_test_support::load_sse_fixture;
+use core_test_support::load_sse_fixture_with_id;
+use tempfile::TempDir;
+use tokio::time::timeout;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::Request;
+use wiremock::Respond;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+fn sse_incomplete() -> String {
+    load_sse_fixture("tests/fixtures/incomplete_sse.json")
+}
+
+fn sse_completed(id: &str) -> String {
+    load_sse_fixture_with_id("tests/fixtures/completed_template.json", id)
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn retries_on_early_close() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    let server = MockServer::start().await;
+
+    struct SeqResponder;
+    impl Respond for SeqResponder {
+        fn respond(&self, _: &Request) -> ResponseTemplate {
+            use std::sync::atomic::AtomicUsize;
+            use std::sync::atomic::Ordering;
+            static CALLS: AtomicUsize = AtomicUsize::new(0);
+            let n = CALLS.fetch_add(1, Ordering::SeqCst);
+            if n == 0 {
+                ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_raw(sse_incomplete(), "text/event-stream")
+            } else {
+                ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_raw(sse_completed("resp_ok"), "text/event-stream")
+            }
+        }
+    }
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(SeqResponder {})
+        .expect(2)
+        .mount(&server)
+        .await;
+
+    // Configure retry behavior explicitly to avoid mutating process-wide
+    // environment variables.
+
+    let model_provider = ModelProviderInfo {
+        name: "openai".into(),
+        base_url: Some(format!("{}/v1", server.uri())),
+        // Environment variable that should exist in the test environment.
+        // ModelClient will return an error if the environment variable for the
+        // provider is not set.
+        env_key: Some("PATH".into()),
+        env_key_instructions: None,
+        wire_api: codex_core::WireApi::Responses,
+        query_params: None,
+        http_headers: None,
+        env_http_headers: None,
+        // exercise retry path: first attempt yields incomplete stream, so allow 1 retry
+        request_max_retries: Some(0),
+        stream_max_retries: Some(1),
+        stream_idle_timeout_ms: Some(2000),
+        requires_openai_auth: false,
+    };
+
+    let codex_home = TempDir::new().unwrap();
+    let mut config = load_default_config_for_test(&codex_home);
+    config.model_provider = model_provider;
+    let conversation_manager =
+        ConversationManager::with_auth(CodexAuth::from_api_key("Test API Key"));
+    let codex = conversation_manager
+        .new_conversation(config)
+        .await
+        .unwrap()
+        .conversation;
+
+    codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: "hello".into(),
+            }],
+        })
+        .await
+        .unwrap();
+
+    // Wait until TaskComplete (should succeed after retry).
+    loop {
+        let ev = timeout(Duration::from_secs(10), codex.next_event())
+            .await
+            .unwrap()
+            .unwrap();
+        if matches!(ev.msg, EventMsg::TaskComplete(_)) {
+            break;
+        }
+    }
+}