Add codex exec testing helpers (#4254)

Add a shortcut to create working directories and run codex exec with fake server.
2025-09-25 17:12:45 -07:00
parent 9f2ab97fbc
commit 8e3a048fec
9 changed files with 116 additions and 90 deletions
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -1107,6 +1107,7 @@ name = "core_test_support"
 version = "0.0.0"
 dependencies = [
 "anyhow",
+ "assert_cmd",
 "codex-core",
 "serde_json",
 "tempfile",
--- a/codex-rs/core/tests/common/Cargo.toml
+++ b/codex-rs/core/tests/common/Cargo.toml
@@ -8,6 +8,7 @@ path = "lib.rs"

 [dependencies]
 anyhow = { workspace = true }
+assert_cmd = { workspace = true }
 codex-core = { workspace = true }
 serde_json = { workspace = true }
 tempfile = { workspace = true }
--- a/codex-rs/core/tests/common/lib.rs
+++ b/codex-rs/core/tests/common/lib.rs
@@ -9,6 +9,7 @@ use codex_core::config::ConfigToml;

 pub mod responses;
 pub mod test_codex;
+pub mod test_codex_exec;

 /// Returns a default `Config` whose on-disk state is confined to the provided
 /// temporary directory. Using a per-test directory keeps tests hermetic and
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -2,6 +2,7 @@ use serde_json::Value;
 use wiremock::BodyPrintLimit;
 use wiremock::Mock;
 use wiremock::MockServer;
+use wiremock::Respond;
 use wiremock::ResponseTemplate;
 use wiremock::matchers::method;
 use wiremock::matchers::path;
@@ -131,3 +132,41 @@ pub async fn start_mock_server() -> MockServer {
        .start()
        .await
 }
+
+/// Mounts a sequence of SSE response bodies and serves them in order for each
+/// POST to `/v1/responses`. Panics if more requests are received than bodies
+/// provided. Also asserts the exact number of expected calls.
+pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) {
+    use std::sync::atomic::AtomicUsize;
+    use std::sync::atomic::Ordering;
+
+    struct SeqResponder {
+        num_calls: AtomicUsize,
+        responses: Vec<String>,
+    }
+
+    impl Respond for SeqResponder {
+        fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
+            let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
+            match self.responses.get(call_num) {
+                Some(body) => ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_string(body.clone()),
+                None => panic!("no response for {call_num}"),
+            }
+        }
+    }
+
+    let num_calls = bodies.len();
+    let responder = SeqResponder {
+        num_calls: AtomicUsize::new(0),
+        responses: bodies,
+    };
+
+    Mock::given(method("POST"))
+        .and(path("/v1/responses"))
+        .respond_with(responder)
+        .expect(num_calls as u64)
+        .mount(server)
+        .await;
+}
--- a/codex-rs/core/tests/common/test_codex_exec.rs
+++ b/codex-rs/core/tests/common/test_codex_exec.rs
@@ -0,0 +1,40 @@
+#![allow(clippy::expect_used)]
+use std::path::Path;
+use tempfile::TempDir;
+use wiremock::MockServer;
+
+pub struct TestCodexExecBuilder {
+    home: TempDir,
+    cwd: TempDir,
+}
+
+impl TestCodexExecBuilder {
+    pub fn cmd(&self) -> assert_cmd::Command {
+        let mut cmd = assert_cmd::Command::cargo_bin("codex-exec")
+            .expect("should find binary for codex-exec");
+        cmd.current_dir(self.cwd.path())
+            .env("CODEX_HOME", self.home.path())
+            .env("OPENAI_API_KEY", "dummy");
+        cmd
+    }
+    pub fn cmd_with_server(&self, server: &MockServer) -> assert_cmd::Command {
+        let mut cmd = self.cmd();
+        let base = format!("{}/v1", server.uri());
+        cmd.env("OPENAI_BASE_URL", base);
+        cmd
+    }
+
+    pub fn cwd_path(&self) -> &Path {
+        self.cwd.path()
+    }
+    pub fn home_path(&self) -> &Path {
+        self.home.path()
+    }
+}
+
+pub fn test_codex_exec() -> TestCodexExecBuilder {
+    TestCodexExecBuilder {
+        home: TempDir::new().expect("create temp home"),
+        cwd: TempDir::new().expect("create temp cwd"),
+    }
+}
--- a/codex-rs/exec/tests/suite/apply_patch.rs
+++ b/codex-rs/exec/tests/suite/apply_patch.rs
@@ -6,7 +6,9 @@ use codex_core::CODEX_APPLY_PATCH_ARG1;
 use core_test_support::responses::ev_apply_patch_custom_tool_call;
 use core_test_support::responses::ev_apply_patch_function_call;
 use core_test_support::responses::ev_completed;
+use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
+use core_test_support::responses::start_mock_server;
 use std::fs;
 use std::process::Command;
 use tempfile::tempdir;
@@ -47,13 +49,13 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
 #[cfg(not(target_os = "windows"))]
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_tool() -> anyhow::Result<()> {
-    use crate::suite::common::run_e2e_exec_test;
    use core_test_support::skip_if_no_network;
+    use core_test_support::test_codex_exec::test_codex_exec;

    skip_if_no_network!(Ok(()));

-    let tmp_cwd = tempdir().expect("failed to create temp dir");
-    let tmp_path = tmp_cwd.path().to_path_buf();
+    let test = test_codex_exec();
+    let tmp_path = test.cwd_path().to_path_buf();
    let add_patch = r#"*** Begin Patch
 *** Add File: test.md
 +Hello world
@@ -75,7 +77,16 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
        ]),
        sse(vec![ev_completed("request_2")]),
    ];
-    run_e2e_exec_test(tmp_cwd.path(), response_streams).await;
+    let server = start_mock_server().await;
+    mount_sse_sequence(&server, response_streams).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();

    let final_path = tmp_path.join("test.md");
    let contents = std::fs::read_to_string(&final_path)
@@ -87,12 +98,12 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
 #[cfg(not(target_os = "windows"))]
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
-    use crate::suite::common::run_e2e_exec_test;
    use core_test_support::skip_if_no_network;
+    use core_test_support::test_codex_exec::test_codex_exec;

    skip_if_no_network!(Ok(()));

-    let tmp_cwd = tempdir().expect("failed to create temp dir");
+    let test = test_codex_exec();
    let freeform_add_patch = r#"*** Begin Patch
 *** Add File: app.py
 +class BaseClass:
@@ -117,10 +128,19 @@ async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
        ]),
        sse(vec![ev_completed("request_2")]),
    ];
-    run_e2e_exec_test(tmp_cwd.path(), response_streams).await;
+    let server = start_mock_server().await;
+    mount_sse_sequence(&server, response_streams).await;
+
+    test.cmd_with_server(&server)
+        .arg("--skip-git-repo-check")
+        .arg("-s")
+        .arg("danger-full-access")
+        .arg("foo")
+        .assert()
+        .success();

    // Verify final file contents
-    let final_path = tmp_cwd.path().join("app.py");
+    let final_path = test.cwd_path().join("app.py");
    let contents = std::fs::read_to_string(&final_path)
        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
    assert_eq!(
--- a/codex-rs/exec/tests/suite/common.rs
+++ b/codex-rs/exec/tests/suite/common.rs
@@ -1,69 +0,0 @@
-// this file is only used for e2e tests which are currently disabled on windows
-#![cfg(not(target_os = "windows"))]
-#![allow(clippy::expect_used)]
-
-use anyhow::Context;
-use assert_cmd::prelude::*;
-use std::path::Path;
-use std::process::Command;
-use std::sync::atomic::AtomicUsize;
-use std::sync::atomic::Ordering;
-use wiremock::Mock;
-use wiremock::MockServer;
-use wiremock::matchers::method;
-use wiremock::matchers::path;
-
-use wiremock::Respond;
-
-struct SeqResponder {
-    num_calls: AtomicUsize,
-    responses: Vec<String>,
-}
-
-impl Respond for SeqResponder {
-    fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate {
-        let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
-        match self.responses.get(call_num) {
-            Some(body) => wiremock::ResponseTemplate::new(200)
-                .insert_header("content-type", "text/event-stream")
-                .set_body_string(body.clone()),
-            None => panic!("no response for {call_num}"),
-        }
-    }
-}
-
-/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock
-/// server, and returns the response_streams in order for each api call. Runs
-/// the codex-exec command with the wiremock server as the model server.
-pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec<String>) {
-    let server = MockServer::start().await;
-
-    let num_calls = response_streams.len();
-    let seq_responder = SeqResponder {
-        num_calls: AtomicUsize::new(0),
-        responses: response_streams,
-    };
-
-    Mock::given(method("POST"))
-        .and(path("/v1/responses"))
-        .respond_with(seq_responder)
-        .expect(num_calls as u64)
-        .mount(&server)
-        .await;
-
-    let cwd = cwd.to_path_buf();
-    let uri = server.uri();
-    Command::cargo_bin("codex-exec")
-        .context("should find binary for codex-exec")
-        .expect("should find binary for codex-exec")
-        .current_dir(cwd.clone())
-        .env("CODEX_HOME", cwd)
-        .env("OPENAI_API_KEY", "dummy")
-        .env("OPENAI_BASE_URL", format!("{uri}/v1"))
-        .arg("--skip-git-repo-check")
-        .arg("-s")
-        .arg("danger-full-access")
-        .arg("foo")
-        .assert()
-        .success();
-}
--- a/codex-rs/exec/tests/suite/mod.rs
+++ b/codex-rs/exec/tests/suite/mod.rs
@@ -1,6 +1,5 @@
 // Aggregates all former standalone integration tests as modules.
 mod apply_patch;
-mod common;
 mod output_schema;
 mod resume;
 mod sandbox;
--- a/codex-rs/exec/tests/suite/output_schema.rs
+++ b/codex-rs/exec/tests/suite/output_schema.rs
@@ -1,17 +1,14 @@
 #![cfg(not(target_os = "windows"))]
 #![allow(clippy::expect_used, clippy::unwrap_used)]

-use assert_cmd::prelude::*;
 use core_test_support::responses;
+use core_test_support::test_codex_exec::test_codex_exec;
 use serde_json::Value;
-use std::process::Command;
-use tempfile::TempDir;
 use wiremock::matchers::any;

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
-    let home = TempDir::new()?;
-    let workspace = TempDir::new()?;
+    let test = test_codex_exec();

    let schema_contents = serde_json::json!({
        "type": "object",
@@ -21,7 +18,7 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
        "required": ["answer"],
        "additionalProperties": false
    });
-    let schema_path = workspace.path().join("schema.json");
+    let schema_path = test.cwd_path().join("schema.json");
    std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
    let expected_schema: Value = schema_contents;

@@ -36,14 +33,11 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
    ]);
    responses::mount_sse_once(&server, any(), body).await;

-    Command::cargo_bin("codex-exec")?
-        .current_dir(workspace.path())
-        .env("CODEX_HOME", home.path())
-        .env("OPENAI_API_KEY", "dummy")
-        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
+    test.cmd_with_server(&server)
        .arg("--skip-git-repo-check")
+        // keep using -C in the test to exercise the flag as well
        .arg("-C")
-        .arg(workspace.path())
+        .arg(test.cwd_path())
        .arg("--output-schema")
        .arg(&schema_path)
        .arg("-m")