[exec] Clean up apply-patch tests (#2648)

## Summary These tests were getting a bit unwieldy, and they're starting to become load-bearing. Let's clean them up, and get them working solidly so we can easily expand this harness with new tests. ## Test Plan - [x] Tests continue to pass
2025-08-25 15:08:01 -07:00
parent 568d6f819f
commit 7f7d1e30f3
9 changed files with 237 additions and 274 deletions
--- a/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
+++ b/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt
@@ -0,0 +1,4 @@
+class BaseClass:
+  def method():
+
+    return True
--- a/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json
@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "custom_tool_call",
+      "name": "apply_patch",
+      "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
--- a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json
@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "custom_tool_call",
+      "name": "apply_patch",
+      "input": "*** Begin Patch\n*** Add File: app.py\n+class BaseClass:\n+  def method():\n+    return False\n*** End Patch",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
--- a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json
@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "custom_tool_call",
+      "name": "apply_patch",
+      "input": "*** Begin Patch\n*** Update File: app.py\n@@  def method():\n-    return False\n+\n+    return True\n*** End Patch",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
--- a/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json
+++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json
@@ -0,0 +1,25 @@
+[
+  {
+    "type": "response.output_item.done",
+    "item": {
+      "type": "function_call",
+      "name": "apply_patch",
+      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
+      "call_id": "__ID__"
+    }
+  },
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
--- a/codex-rs/exec/tests/fixtures/sse_response_completed.json
+++ b/codex-rs/exec/tests/fixtures/sse_response_completed.json
@@ -0,0 +1,16 @@
+[
+  {
+    "type": "response.completed",
+    "response": {
+      "id": "__ID__",
+      "usage": {
+        "input_tokens": 0,
+        "input_tokens_details": null,
+        "output_tokens": 0,
+        "output_tokens_details": null,
+        "total_tokens": 0
+      },
+      "output": []
+    }
+  }
+]
--- a/codex-rs/exec/tests/suite/apply_patch.rs
+++ b/codex-rs/exec/tests/suite/apply_patch.rs
@@ -41,148 +41,31 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
 }

 #[cfg(not(target_os = "windows"))]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_tool() -> anyhow::Result<()> {
-    use core_test_support::load_sse_fixture_with_id_from_str;
-    use tempfile::TempDir;
-    use wiremock::Mock;
-    use wiremock::MockServer;
-    use wiremock::ResponseTemplate;
-    use wiremock::matchers::method;
-    use wiremock::matchers::path;
+    use crate::suite::common::run_e2e_exec_test;
+    use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;

-    const SSE_TOOL_CALL_ADD: &str = r#"[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "function_call",
-      "name": "apply_patch",
-      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    const SSE_TOOL_CALL_UPDATE: &str = r#"[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "function_call",
-      "name": "apply_patch",
-      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    const SSE_TOOL_CALL_COMPLETED: &str = r#"[
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    // Start a mock model server
-    let server = MockServer::start().await;
-
-    // First response: model calls apply_patch to create test.md
-    let first = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
-            "text/event-stream",
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
+        return Ok(());
+    }

-    Mock::given(method("POST"))
-        // .and(path("/v1/responses"))
-        .respond_with(first)
-        .up_to_n_times(1)
-        .mount(&server)
-        .await;
+    let tmp_cwd = tempdir().expect("failed to create temp dir");
+    let tmp_path = tmp_cwd.path().to_path_buf();
+    run_e2e_exec_test(
+        tmp_cwd.path(),
+        vec![
+            include_str!("../fixtures/sse_apply_patch_add.json").to_string(),
+            include_str!("../fixtures/sse_apply_patch_update.json").to_string(),
+            include_str!("../fixtures/sse_response_completed.json").to_string(),
+        ],
+    )
+    .await;

-    // Second response: model calls apply_patch to update test.md
-    let second = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
-            "text/event-stream",
-        );
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(second)
-        .up_to_n_times(1)
-        .mount(&server)
-        .await;
-
-    let final_completed = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
-            "text/event-stream",
-        );
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(final_completed)
-        .expect(1)
-        .mount(&server)
-        .await;
-
-    let tmp_cwd = TempDir::new().unwrap();
-    Command::cargo_bin("codex-exec")
-        .context("should find binary for codex-exec")?
-        .current_dir(tmp_cwd.path())
-        .env("CODEX_HOME", tmp_cwd.path())
-        .env("OPENAI_API_KEY", "dummy")
-        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
-        .arg("--skip-git-repo-check")
-        .arg("-s")
-        .arg("workspace-write")
-        .arg("foo")
-        .assert()
-        .success();
-
-    // Verify final file contents
-    let final_path = tmp_cwd.path().join("test.md");
+    let final_path = tmp_path.join("test.md");
    let contents = std::fs::read_to_string(&final_path)
        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
    assert_eq!(contents, "Final text\n");
@@ -190,150 +73,36 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
 }

 #[cfg(not(target_os = "windows"))]
-#[tokio::test]
+#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
-    use core_test_support::load_sse_fixture_with_id_from_str;
-    use tempfile::TempDir;
-    use wiremock::Mock;
-    use wiremock::MockServer;
-    use wiremock::ResponseTemplate;
-    use wiremock::matchers::method;
-    use wiremock::matchers::path;
+    use crate::suite::common::run_e2e_exec_test;
+    use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;

-    const SSE_TOOL_CALL_ADD: &str = r#"[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "custom_tool_call",
-      "name": "apply_patch",
-      "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    const SSE_TOOL_CALL_UPDATE: &str = r#"[
-  {
-    "type": "response.output_item.done",
-    "item": {
-      "type": "custom_tool_call",
-      "name": "apply_patch",
-      "input": "*** Begin Patch\n*** Update File: test.md\n@@\n-Hello world\n+Final text\n*** End Patch",
-      "call_id": "__ID__"
-    }
-  },
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    const SSE_TOOL_CALL_COMPLETED: &str = r#"[
-  {
-    "type": "response.completed",
-    "response": {
-      "id": "__ID__",
-      "usage": {
-        "input_tokens": 0,
-        "input_tokens_details": null,
-        "output_tokens": 0,
-        "output_tokens_details": null,
-        "total_tokens": 0
-      },
-      "output": []
-    }
-  }
-]"#;
-
-    // Start a mock model server
-    let server = MockServer::start().await;
-
-    // First response: model calls apply_patch to create test.md
-    let first = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
-            "text/event-stream",
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
+        return Ok(());
+    }

-    Mock::given(method("POST"))
-        // .and(path("/v1/responses"))
-        .respond_with(first)
-        .up_to_n_times(1)
-        .mount(&server)
-        .await;
-
-    // Second response: model calls apply_patch to update test.md
-    let second = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
-            "text/event-stream",
-        );
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(second)
-        .up_to_n_times(1)
-        .mount(&server)
-        .await;
-
-    let final_completed = ResponseTemplate::new(200)
-        .insert_header("content-type", "text/event-stream")
-        .set_body_raw(
-            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
-            "text/event-stream",
-        );
-
-    Mock::given(method("POST"))
-        // .and(path("/v1/responses"))
-        .respond_with(final_completed)
-        .expect(1)
-        .mount(&server)
-        .await;
-
-    let tmp_cwd = TempDir::new().unwrap();
-    Command::cargo_bin("codex-exec")
-        .context("should find binary for codex-exec")?
-        .current_dir(tmp_cwd.path())
-        .env("CODEX_HOME", tmp_cwd.path())
-        .env("OPENAI_API_KEY", "dummy")
-        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
-        .arg("--skip-git-repo-check")
-        .arg("-s")
-        .arg("workspace-write")
-        .arg("foo")
-        .assert()
-        .success();
+    let tmp_cwd = tempdir().expect("failed to create temp dir");
+    run_e2e_exec_test(
+        tmp_cwd.path(),
+        vec![
+            include_str!("../fixtures/sse_apply_patch_freeform_add.json").to_string(),
+            include_str!("../fixtures/sse_apply_patch_freeform_update.json").to_string(),
+            include_str!("../fixtures/sse_response_completed.json").to_string(),
+        ],
+    )
+    .await;

    // Verify final file contents
-    let final_path = tmp_cwd.path().join("test.md");
+    let final_path = tmp_cwd.path().join("app.py");
    let contents = std::fs::read_to_string(&final_path)
        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
-    assert_eq!(contents, "Final text\n");
+    assert_eq!(
+        contents,
+        include_str!("../fixtures/apply_patch_freeform_final.txt")
+    );
    Ok(())
 }
--- a/codex-rs/exec/tests/suite/common.rs
+++ b/codex-rs/exec/tests/suite/common.rs
@@ -0,0 +1,73 @@
+// this file is only used for e2e tests which are currently disabled on windows
+#![cfg(not(target_os = "windows"))]
+#![allow(clippy::expect_used)]
+
+use anyhow::Context;
+use assert_cmd::prelude::*;
+use core_test_support::load_sse_fixture_with_id_from_str;
+use std::path::Path;
+use std::process::Command;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+use wiremock::Respond;
+
+struct SeqResponder {
+    num_calls: AtomicUsize,
+    responses: Vec<String>,
+}
+
+impl Respond for SeqResponder {
+    fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate {
+        let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
+        match self.responses.get(call_num) {
+            Some(body) => wiremock::ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(
+                    load_sse_fixture_with_id_from_str(body, &format!("request_{}", call_num)),
+                    "text/event-stream",
+                ),
+            None => panic!("no response for {call_num}"),
+        }
+    }
+}
+
+/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock
+/// server, and returns the response_streams in order for each api call. Runs
+/// the codex-exec command with the wiremock server as the model server.
+pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec<String>) {
+    let server = MockServer::start().await;
+
+    let num_calls = response_streams.len();
+    let seq_responder = SeqResponder {
+        num_calls: AtomicUsize::new(0),
+        responses: response_streams,
+    };
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(seq_responder)
+        .expect(num_calls as u64)
+        .mount(&server)
+        .await;
+
+    let cwd = cwd.to_path_buf();
+    let uri = server.uri();
+    Command::cargo_bin("codex-exec")
+        .context("should find binary for codex-exec")
+        .expect("should find binary for codex-exec")
+        .current_dir(cwd.clone())
+        .env("CODEX_HOME", cwd.clone())
+        .env("OPENAI_API_KEY", "dummy")
+        .env("OPENAI_BASE_URL", format!("{}/v1", uri))
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();
+}
--- a/codex-rs/exec/tests/suite/mod.rs
+++ b/codex-rs/exec/tests/suite/mod.rs
@@ -1,3 +1,4 @@
 // Aggregates all former standalone integration tests as modules.
 mod apply_patch;
+mod common;
 mod sandbox;