diff --git a/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt b/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt new file mode 100644 index 00000000..b01923e0 --- /dev/null +++ b/codex-rs/exec/tests/fixtures/apply_patch_freeform_final.txt @@ -0,0 +1,4 @@ +class BaseClass: + def method(): + + return True diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json new file mode 100644 index 00000000..8d2bf261 --- /dev/null +++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_add.json @@ -0,0 +1,25 @@ +[ + { + "type": "response.output_item.done", + "item": { + "type": "custom_tool_call", + "name": "apply_patch", + "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch", + "call_id": "__ID__" + } + }, + { + "type": "response.completed", + "response": { + "id": "__ID__", + "usage": { + "input_tokens": 0, + "input_tokens_details": null, + "output_tokens": 0, + "output_tokens_details": null, + "total_tokens": 0 + }, + "output": [] + } + } +] diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json new file mode 100644 index 00000000..ce05e7d4 --- /dev/null +++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_add.json @@ -0,0 +1,25 @@ +[ + { + "type": "response.output_item.done", + "item": { + "type": "custom_tool_call", + "name": "apply_patch", + "input": "*** Begin Patch\n*** Add File: app.py\n+class BaseClass:\n+ def method():\n+ return False\n*** End Patch", + "call_id": "__ID__" + } + }, + { + "type": "response.completed", + "response": { + "id": "__ID__", + "usage": { + "input_tokens": 0, + "input_tokens_details": null, + "output_tokens": 0, + "output_tokens_details": null, + "total_tokens": 0 + }, + "output": [] + } + } +] diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json new file mode 100644 index 00000000..8329d962 --- /dev/null +++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_freeform_update.json @@ -0,0 +1,25 @@ +[ + { + "type": "response.output_item.done", + "item": { + "type": "custom_tool_call", + "name": "apply_patch", + "input": "*** Begin Patch\n*** Update File: app.py\n@@ def method():\n- return False\n+\n+ return True\n*** End Patch", + "call_id": "__ID__" + } + }, + { + "type": "response.completed", + "response": { + "id": "__ID__", + "usage": { + "input_tokens": 0, + "input_tokens_details": null, + "output_tokens": 0, + "output_tokens_details": null, + "total_tokens": 0 + }, + "output": [] + } + } +] diff --git a/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json b/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json new file mode 100644 index 00000000..79689bec --- /dev/null +++ b/codex-rs/exec/tests/fixtures/sse_apply_patch_update.json @@ -0,0 +1,25 @@ +[ + { + "type": "response.output_item.done", + "item": { + "type": "function_call", + "name": "apply_patch", + "arguments": "{\n \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}", + "call_id": "__ID__" + } + }, + { + "type": "response.completed", + "response": { + "id": "__ID__", + "usage": { + "input_tokens": 0, + "input_tokens_details": null, + "output_tokens": 0, + "output_tokens_details": null, + "total_tokens": 0 + }, + "output": [] + } + } +] diff --git a/codex-rs/exec/tests/fixtures/sse_response_completed.json b/codex-rs/exec/tests/fixtures/sse_response_completed.json new file mode 100644 index 00000000..1774dc5e --- /dev/null +++ b/codex-rs/exec/tests/fixtures/sse_response_completed.json @@ -0,0 +1,16 @@ +[ + { + "type": "response.completed", + "response": { + "id": "__ID__", + "usage": { + "input_tokens": 0, + "input_tokens_details": null, + "output_tokens": 0, + "output_tokens_details": null, + "total_tokens": 0 + }, + "output": [] + } + } +] diff --git a/codex-rs/exec/tests/suite/apply_patch.rs b/codex-rs/exec/tests/suite/apply_patch.rs index f05bb732..5537853b 100644 --- a/codex-rs/exec/tests/suite/apply_patch.rs +++ b/codex-rs/exec/tests/suite/apply_patch.rs @@ -41,148 +41,31 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> { } #[cfg(not(target_os = "windows"))] -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_apply_patch_tool() -> anyhow::Result<()> { - use core_test_support::load_sse_fixture_with_id_from_str; - use tempfile::TempDir; - use wiremock::Mock; - use wiremock::MockServer; - use wiremock::ResponseTemplate; - use wiremock::matchers::method; - use wiremock::matchers::path; + use crate::suite::common::run_e2e_exec_test; + use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; - const SSE_TOOL_CALL_ADD: &str = r#"[ - { - "type": "response.output_item.done", - "item": { - "type": "function_call", - "name": "apply_patch", - "arguments": "{\n \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}", - "call_id": "__ID__" - } - }, - { - "type": "response.completed", - "response": { - "id": "__ID__", - "usage": { - "input_tokens": 0, - "input_tokens_details": null, - "output_tokens": 0, - "output_tokens_details": null, - "total_tokens": 0 - }, - "output": [] - } - } -]"#; - - const SSE_TOOL_CALL_UPDATE: &str = r#"[ - { - "type": "response.output_item.done", - "item": { - "type": "function_call", - "name": "apply_patch", - "arguments": "{\n \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}", - "call_id": "__ID__" - } - }, - { - "type": "response.completed", - "response": { - "id": "__ID__", - "usage": { - "input_tokens": 0, - "input_tokens_details": null, - "output_tokens": 0, - "output_tokens_details": null, - "total_tokens": 0 - }, - "output": [] - } - } -]"#; - - const SSE_TOOL_CALL_COMPLETED: &str = r#"[ - { - "type": "response.completed", - "response": { - "id": "__ID__", - "usage": { - "input_tokens": 0, - "input_tokens_details": null, - "output_tokens": 0, - "output_tokens_details": null, - "total_tokens": 0 - }, - "output": [] - } - } -]"#; - - // Start a mock model server - let server = MockServer::start().await; - - // First response: model calls apply_patch to create test.md - let first = ResponseTemplate::new(200) - .insert_header("content-type", "text/event-stream") - .set_body_raw( - load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"), - "text/event-stream", + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." ); + return Ok(()); + } - Mock::given(method("POST")) - // .and(path("/v1/responses")) - .respond_with(first) - .up_to_n_times(1) - .mount(&server) - .await; + let tmp_cwd = tempdir().expect("failed to create temp dir"); + let tmp_path = tmp_cwd.path().to_path_buf(); + run_e2e_exec_test( + tmp_cwd.path(), + vec![ + include_str!("../fixtures/sse_apply_patch_add.json").to_string(), + include_str!("../fixtures/sse_apply_patch_update.json").to_string(), + include_str!("../fixtures/sse_response_completed.json").to_string(), + ], + ) + .await; - // Second response: model calls apply_patch to update test.md - let second = ResponseTemplate::new(200) - .insert_header("content-type", "text/event-stream") - .set_body_raw( - load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"), - "text/event-stream", - ); - - Mock::given(method("POST")) - .and(path("/v1/responses")) - .respond_with(second) - .up_to_n_times(1) - .mount(&server) - .await; - - let final_completed = ResponseTemplate::new(200) - .insert_header("content-type", "text/event-stream") - .set_body_raw( - load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"), - "text/event-stream", - ); - - Mock::given(method("POST")) - .and(path("/v1/responses")) - .respond_with(final_completed) - .expect(1) - .mount(&server) - .await; - - let tmp_cwd = TempDir::new().unwrap(); - Command::cargo_bin("codex-exec") - .context("should find binary for codex-exec")? - .current_dir(tmp_cwd.path()) - .env("CODEX_HOME", tmp_cwd.path()) - .env("OPENAI_API_KEY", "dummy") - .env("OPENAI_BASE_URL", format!("{}/v1", server.uri())) - .arg("--skip-git-repo-check") - .arg("-s") - .arg("workspace-write") - .arg("foo") - .assert() - .success(); - - // Verify final file contents - let final_path = tmp_cwd.path().join("test.md"); + let final_path = tmp_path.join("test.md"); let contents = std::fs::read_to_string(&final_path) .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display())); assert_eq!(contents, "Final text\n"); @@ -190,150 +73,36 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> { } #[cfg(not(target_os = "windows"))] -#[tokio::test] +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> { - use core_test_support::load_sse_fixture_with_id_from_str; - use tempfile::TempDir; - use wiremock::Mock; - use wiremock::MockServer; - use wiremock::ResponseTemplate; - use wiremock::matchers::method; - use wiremock::matchers::path; + use crate::suite::common::run_e2e_exec_test; + use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; - const SSE_TOOL_CALL_ADD: &str = r#"[ - { - "type": "response.output_item.done", - "item": { - "type": "custom_tool_call", - "name": "apply_patch", - "input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch", - "call_id": "__ID__" - } - }, - { - "type": "response.completed", - "response": { - "id": "__ID__", - "usage": { - "input_tokens": 0, - "input_tokens_details": null, - "output_tokens": 0, - "output_tokens_details": null, - "total_tokens": 0 - }, - "output": [] - } - } -]"#; - - const SSE_TOOL_CALL_UPDATE: &str = r#"[ - { - "type": "response.output_item.done", - "item": { - "type": "custom_tool_call", - "name": "apply_patch", - "input": "*** Begin Patch\n*** Update File: test.md\n@@\n-Hello world\n+Final text\n*** End Patch", - "call_id": "__ID__" - } - }, - { - "type": "response.completed", - "response": { - "id": "__ID__", - "usage": { - "input_tokens": 0, - "input_tokens_details": null, - "output_tokens": 0, - "output_tokens_details": null, - "total_tokens": 0 - }, - "output": [] - } - } -]"#; - - const SSE_TOOL_CALL_COMPLETED: &str = r#"[ - { - "type": "response.completed", - "response": { - "id": "__ID__", - "usage": { - "input_tokens": 0, - "input_tokens_details": null, - "output_tokens": 0, - "output_tokens_details": null, - "total_tokens": 0 - }, - "output": [] - } - } -]"#; - - // Start a mock model server - let server = MockServer::start().await; - - // First response: model calls apply_patch to create test.md - let first = ResponseTemplate::new(200) - .insert_header("content-type", "text/event-stream") - .set_body_raw( - load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"), - "text/event-stream", + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." ); + return Ok(()); + } - Mock::given(method("POST")) - // .and(path("/v1/responses")) - .respond_with(first) - .up_to_n_times(1) - .mount(&server) - .await; - - // Second response: model calls apply_patch to update test.md - let second = ResponseTemplate::new(200) - .insert_header("content-type", "text/event-stream") - .set_body_raw( - load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"), - "text/event-stream", - ); - - Mock::given(method("POST")) - .and(path("/v1/responses")) - .respond_with(second) - .up_to_n_times(1) - .mount(&server) - .await; - - let final_completed = ResponseTemplate::new(200) - .insert_header("content-type", "text/event-stream") - .set_body_raw( - load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"), - "text/event-stream", - ); - - Mock::given(method("POST")) - // .and(path("/v1/responses")) - .respond_with(final_completed) - .expect(1) - .mount(&server) - .await; - - let tmp_cwd = TempDir::new().unwrap(); - Command::cargo_bin("codex-exec") - .context("should find binary for codex-exec")? - .current_dir(tmp_cwd.path()) - .env("CODEX_HOME", tmp_cwd.path()) - .env("OPENAI_API_KEY", "dummy") - .env("OPENAI_BASE_URL", format!("{}/v1", server.uri())) - .arg("--skip-git-repo-check") - .arg("-s") - .arg("workspace-write") - .arg("foo") - .assert() - .success(); + let tmp_cwd = tempdir().expect("failed to create temp dir"); + run_e2e_exec_test( + tmp_cwd.path(), + vec![ + include_str!("../fixtures/sse_apply_patch_freeform_add.json").to_string(), + include_str!("../fixtures/sse_apply_patch_freeform_update.json").to_string(), + include_str!("../fixtures/sse_response_completed.json").to_string(), + ], + ) + .await; // Verify final file contents - let final_path = tmp_cwd.path().join("test.md"); + let final_path = tmp_cwd.path().join("app.py"); let contents = std::fs::read_to_string(&final_path) .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display())); - assert_eq!(contents, "Final text\n"); + assert_eq!( + contents, + include_str!("../fixtures/apply_patch_freeform_final.txt") + ); Ok(()) } diff --git a/codex-rs/exec/tests/suite/common.rs b/codex-rs/exec/tests/suite/common.rs new file mode 100644 index 00000000..49747dca --- /dev/null +++ b/codex-rs/exec/tests/suite/common.rs @@ -0,0 +1,73 @@ +// this file is only used for e2e tests which are currently disabled on windows +#![cfg(not(target_os = "windows"))] +#![allow(clippy::expect_used)] + +use anyhow::Context; +use assert_cmd::prelude::*; +use core_test_support::load_sse_fixture_with_id_from_str; +use std::path::Path; +use std::process::Command; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::matchers::method; +use wiremock::matchers::path; + +use wiremock::Respond; + +struct SeqResponder { + num_calls: AtomicUsize, + responses: Vec, +} + +impl Respond for SeqResponder { + fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate { + let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst); + match self.responses.get(call_num) { + Some(body) => wiremock::ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw( + load_sse_fixture_with_id_from_str(body, &format!("request_{}", call_num)), + "text/event-stream", + ), + None => panic!("no response for {call_num}"), + } + } +} + +/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock +/// server, and returns the response_streams in order for each api call. Runs +/// the codex-exec command with the wiremock server as the model server. +pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec) { + let server = MockServer::start().await; + + let num_calls = response_streams.len(); + let seq_responder = SeqResponder { + num_calls: AtomicUsize::new(0), + responses: response_streams, + }; + + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with(seq_responder) + .expect(num_calls as u64) + .mount(&server) + .await; + + let cwd = cwd.to_path_buf(); + let uri = server.uri(); + Command::cargo_bin("codex-exec") + .context("should find binary for codex-exec") + .expect("should find binary for codex-exec") + .current_dir(cwd.clone()) + .env("CODEX_HOME", cwd.clone()) + .env("OPENAI_API_KEY", "dummy") + .env("OPENAI_BASE_URL", format!("{}/v1", uri)) + .arg("--skip-git-repo-check") + .arg("-s") + .arg("danger-full-access") + .arg("foo") + .assert() + .success(); +} diff --git a/codex-rs/exec/tests/suite/mod.rs b/codex-rs/exec/tests/suite/mod.rs index 8a83474e..75b19ee1 100644 --- a/codex-rs/exec/tests/suite/mod.rs +++ b/codex-rs/exec/tests/suite/mod.rs @@ -1,3 +1,4 @@ // Aggregates all former standalone integration tests as modules. mod apply_patch; +mod common; mod sandbox;