diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 874a5ab3..8b71f139 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1107,6 +1107,7 @@ name = "core_test_support" version = "0.0.0" dependencies = [ "anyhow", + "assert_cmd", "codex-core", "serde_json", "tempfile", diff --git a/codex-rs/core/tests/common/Cargo.toml b/codex-rs/core/tests/common/Cargo.toml index f85aaa9e..ed8bc93a 100644 --- a/codex-rs/core/tests/common/Cargo.toml +++ b/codex-rs/core/tests/common/Cargo.toml @@ -8,6 +8,7 @@ path = "lib.rs" [dependencies] anyhow = { workspace = true } +assert_cmd = { workspace = true } codex-core = { workspace = true } serde_json = { workspace = true } tempfile = { workspace = true } diff --git a/codex-rs/core/tests/common/lib.rs b/codex-rs/core/tests/common/lib.rs index 8b272681..ce90f397 100644 --- a/codex-rs/core/tests/common/lib.rs +++ b/codex-rs/core/tests/common/lib.rs @@ -9,6 +9,7 @@ use codex_core::config::ConfigToml; pub mod responses; pub mod test_codex; +pub mod test_codex_exec; /// Returns a default `Config` whose on-disk state is confined to the provided /// temporary directory. Using a per-test directory keeps tests hermetic and diff --git a/codex-rs/core/tests/common/responses.rs b/codex-rs/core/tests/common/responses.rs index 2f55f17a..b1de5791 100644 --- a/codex-rs/core/tests/common/responses.rs +++ b/codex-rs/core/tests/common/responses.rs @@ -2,6 +2,7 @@ use serde_json::Value; use wiremock::BodyPrintLimit; use wiremock::Mock; use wiremock::MockServer; +use wiremock::Respond; use wiremock::ResponseTemplate; use wiremock::matchers::method; use wiremock::matchers::path; @@ -131,3 +132,41 @@ pub async fn start_mock_server() -> MockServer { .start() .await } + +/// Mounts a sequence of SSE response bodies and serves them in order for each +/// POST to `/v1/responses`. Panics if more requests are received than bodies +/// provided. Also asserts the exact number of expected calls. +pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec) { + use std::sync::atomic::AtomicUsize; + use std::sync::atomic::Ordering; + + struct SeqResponder { + num_calls: AtomicUsize, + responses: Vec, + } + + impl Respond for SeqResponder { + fn respond(&self, _: &wiremock::Request) -> ResponseTemplate { + let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst); + match self.responses.get(call_num) { + Some(body) => ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_string(body.clone()), + None => panic!("no response for {call_num}"), + } + } + } + + let num_calls = bodies.len(); + let responder = SeqResponder { + num_calls: AtomicUsize::new(0), + responses: bodies, + }; + + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with(responder) + .expect(num_calls as u64) + .mount(server) + .await; +} diff --git a/codex-rs/core/tests/common/test_codex_exec.rs b/codex-rs/core/tests/common/test_codex_exec.rs new file mode 100644 index 00000000..6a4ce32b --- /dev/null +++ b/codex-rs/core/tests/common/test_codex_exec.rs @@ -0,0 +1,40 @@ +#![allow(clippy::expect_used)] +use std::path::Path; +use tempfile::TempDir; +use wiremock::MockServer; + +pub struct TestCodexExecBuilder { + home: TempDir, + cwd: TempDir, +} + +impl TestCodexExecBuilder { + pub fn cmd(&self) -> assert_cmd::Command { + let mut cmd = assert_cmd::Command::cargo_bin("codex-exec") + .expect("should find binary for codex-exec"); + cmd.current_dir(self.cwd.path()) + .env("CODEX_HOME", self.home.path()) + .env("OPENAI_API_KEY", "dummy"); + cmd + } + pub fn cmd_with_server(&self, server: &MockServer) -> assert_cmd::Command { + let mut cmd = self.cmd(); + let base = format!("{}/v1", server.uri()); + cmd.env("OPENAI_BASE_URL", base); + cmd + } + + pub fn cwd_path(&self) -> &Path { + self.cwd.path() + } + pub fn home_path(&self) -> &Path { + self.home.path() + } +} + +pub fn test_codex_exec() -> TestCodexExecBuilder { + TestCodexExecBuilder { + home: TempDir::new().expect("create temp home"), + cwd: TempDir::new().expect("create temp cwd"), + } +} diff --git a/codex-rs/exec/tests/suite/apply_patch.rs b/codex-rs/exec/tests/suite/apply_patch.rs index da19710d..a32f83ba 100644 --- a/codex-rs/exec/tests/suite/apply_patch.rs +++ b/codex-rs/exec/tests/suite/apply_patch.rs @@ -6,7 +6,9 @@ use codex_core::CODEX_APPLY_PATCH_ARG1; use core_test_support::responses::ev_apply_patch_custom_tool_call; use core_test_support::responses::ev_apply_patch_function_call; use core_test_support::responses::ev_completed; +use core_test_support::responses::mount_sse_sequence; use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; use std::fs; use std::process::Command; use tempfile::tempdir; @@ -47,13 +49,13 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> { #[cfg(not(target_os = "windows"))] #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_apply_patch_tool() -> anyhow::Result<()> { - use crate::suite::common::run_e2e_exec_test; use core_test_support::skip_if_no_network; + use core_test_support::test_codex_exec::test_codex_exec; skip_if_no_network!(Ok(())); - let tmp_cwd = tempdir().expect("failed to create temp dir"); - let tmp_path = tmp_cwd.path().to_path_buf(); + let test = test_codex_exec(); + let tmp_path = test.cwd_path().to_path_buf(); let add_patch = r#"*** Begin Patch *** Add File: test.md +Hello world @@ -75,7 +77,16 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> { ]), sse(vec![ev_completed("request_2")]), ]; - run_e2e_exec_test(tmp_cwd.path(), response_streams).await; + let server = start_mock_server().await; + mount_sse_sequence(&server, response_streams).await; + + test.cmd_with_server(&server) + .arg("--skip-git-repo-check") + .arg("-s") + .arg("danger-full-access") + .arg("foo") + .assert() + .success(); let final_path = tmp_path.join("test.md"); let contents = std::fs::read_to_string(&final_path) @@ -87,12 +98,12 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> { #[cfg(not(target_os = "windows"))] #[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> { - use crate::suite::common::run_e2e_exec_test; use core_test_support::skip_if_no_network; + use core_test_support::test_codex_exec::test_codex_exec; skip_if_no_network!(Ok(())); - let tmp_cwd = tempdir().expect("failed to create temp dir"); + let test = test_codex_exec(); let freeform_add_patch = r#"*** Begin Patch *** Add File: app.py +class BaseClass: @@ -117,10 +128,19 @@ async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> { ]), sse(vec![ev_completed("request_2")]), ]; - run_e2e_exec_test(tmp_cwd.path(), response_streams).await; + let server = start_mock_server().await; + mount_sse_sequence(&server, response_streams).await; + + test.cmd_with_server(&server) + .arg("--skip-git-repo-check") + .arg("-s") + .arg("danger-full-access") + .arg("foo") + .assert() + .success(); // Verify final file contents - let final_path = tmp_cwd.path().join("app.py"); + let final_path = test.cwd_path().join("app.py"); let contents = std::fs::read_to_string(&final_path) .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display())); assert_eq!( diff --git a/codex-rs/exec/tests/suite/common.rs b/codex-rs/exec/tests/suite/common.rs deleted file mode 100644 index 19de9a3e..00000000 --- a/codex-rs/exec/tests/suite/common.rs +++ /dev/null @@ -1,69 +0,0 @@ -// this file is only used for e2e tests which are currently disabled on windows -#![cfg(not(target_os = "windows"))] -#![allow(clippy::expect_used)] - -use anyhow::Context; -use assert_cmd::prelude::*; -use std::path::Path; -use std::process::Command; -use std::sync::atomic::AtomicUsize; -use std::sync::atomic::Ordering; -use wiremock::Mock; -use wiremock::MockServer; -use wiremock::matchers::method; -use wiremock::matchers::path; - -use wiremock::Respond; - -struct SeqResponder { - num_calls: AtomicUsize, - responses: Vec, -} - -impl Respond for SeqResponder { - fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate { - let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst); - match self.responses.get(call_num) { - Some(body) => wiremock::ResponseTemplate::new(200) - .insert_header("content-type", "text/event-stream") - .set_body_string(body.clone()), - None => panic!("no response for {call_num}"), - } - } -} - -/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock -/// server, and returns the response_streams in order for each api call. Runs -/// the codex-exec command with the wiremock server as the model server. -pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec) { - let server = MockServer::start().await; - - let num_calls = response_streams.len(); - let seq_responder = SeqResponder { - num_calls: AtomicUsize::new(0), - responses: response_streams, - }; - - Mock::given(method("POST")) - .and(path("/v1/responses")) - .respond_with(seq_responder) - .expect(num_calls as u64) - .mount(&server) - .await; - - let cwd = cwd.to_path_buf(); - let uri = server.uri(); - Command::cargo_bin("codex-exec") - .context("should find binary for codex-exec") - .expect("should find binary for codex-exec") - .current_dir(cwd.clone()) - .env("CODEX_HOME", cwd) - .env("OPENAI_API_KEY", "dummy") - .env("OPENAI_BASE_URL", format!("{uri}/v1")) - .arg("--skip-git-repo-check") - .arg("-s") - .arg("danger-full-access") - .arg("foo") - .assert() - .success(); -} diff --git a/codex-rs/exec/tests/suite/mod.rs b/codex-rs/exec/tests/suite/mod.rs index 758f39ed..79a3ae38 100644 --- a/codex-rs/exec/tests/suite/mod.rs +++ b/codex-rs/exec/tests/suite/mod.rs @@ -1,6 +1,5 @@ // Aggregates all former standalone integration tests as modules. mod apply_patch; -mod common; mod output_schema; mod resume; mod sandbox; diff --git a/codex-rs/exec/tests/suite/output_schema.rs b/codex-rs/exec/tests/suite/output_schema.rs index 954e1258..03a04d05 100644 --- a/codex-rs/exec/tests/suite/output_schema.rs +++ b/codex-rs/exec/tests/suite/output_schema.rs @@ -1,17 +1,14 @@ #![cfg(not(target_os = "windows"))] #![allow(clippy::expect_used, clippy::unwrap_used)] -use assert_cmd::prelude::*; use core_test_support::responses; +use core_test_support::test_codex_exec::test_codex_exec; use serde_json::Value; -use std::process::Command; -use tempfile::TempDir; use wiremock::matchers::any; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> { - let home = TempDir::new()?; - let workspace = TempDir::new()?; + let test = test_codex_exec(); let schema_contents = serde_json::json!({ "type": "object", @@ -21,7 +18,7 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> { "required": ["answer"], "additionalProperties": false }); - let schema_path = workspace.path().join("schema.json"); + let schema_path = test.cwd_path().join("schema.json"); std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?; let expected_schema: Value = schema_contents; @@ -36,14 +33,11 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> { ]); responses::mount_sse_once(&server, any(), body).await; - Command::cargo_bin("codex-exec")? - .current_dir(workspace.path()) - .env("CODEX_HOME", home.path()) - .env("OPENAI_API_KEY", "dummy") - .env("OPENAI_BASE_URL", format!("{}/v1", server.uri())) + test.cmd_with_server(&server) .arg("--skip-git-repo-check") + // keep using -C in the test to exercise the flag as well .arg("-C") - .arg(workspace.path()) + .arg(test.cwd_path()) .arg("--output-schema") .arg(&schema_path) .arg("-m")