Add codex exec testing helpers (#4254)

Add a shortcut to create working directories and run codex exec with
fake server.
This commit is contained in:
pakrym-oai
2025-09-25 17:12:45 -07:00
committed by GitHub
parent 9f2ab97fbc
commit 8e3a048fec
9 changed files with 116 additions and 90 deletions

1
codex-rs/Cargo.lock generated
View File

@@ -1107,6 +1107,7 @@ name = "core_test_support"
version = "0.0.0"
dependencies = [
"anyhow",
"assert_cmd",
"codex-core",
"serde_json",
"tempfile",

View File

@@ -8,6 +8,7 @@ path = "lib.rs"
[dependencies]
anyhow = { workspace = true }
assert_cmd = { workspace = true }
codex-core = { workspace = true }
serde_json = { workspace = true }
tempfile = { workspace = true }

View File

@@ -9,6 +9,7 @@ use codex_core::config::ConfigToml;
pub mod responses;
pub mod test_codex;
pub mod test_codex_exec;
/// Returns a default `Config` whose on-disk state is confined to the provided
/// temporary directory. Using a per-test directory keeps tests hermetic and

View File

@@ -2,6 +2,7 @@ use serde_json::Value;
use wiremock::BodyPrintLimit;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::Respond;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
@@ -131,3 +132,41 @@ pub async fn start_mock_server() -> MockServer {
.start()
.await
}
/// Mounts a sequence of SSE response bodies and serves them in order for each
/// POST to `/v1/responses`. Panics if more requests are received than bodies
/// provided. Also asserts the exact number of expected calls.
pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) {
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
struct SeqResponder {
num_calls: AtomicUsize,
responses: Vec<String>,
}
impl Respond for SeqResponder {
fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
match self.responses.get(call_num) {
Some(body) => ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_string(body.clone()),
None => panic!("no response for {call_num}"),
}
}
}
let num_calls = bodies.len();
let responder = SeqResponder {
num_calls: AtomicUsize::new(0),
responses: bodies,
};
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(responder)
.expect(num_calls as u64)
.mount(server)
.await;
}

View File

@@ -0,0 +1,40 @@
#![allow(clippy::expect_used)]
use std::path::Path;
use tempfile::TempDir;
use wiremock::MockServer;
pub struct TestCodexExecBuilder {
home: TempDir,
cwd: TempDir,
}
impl TestCodexExecBuilder {
pub fn cmd(&self) -> assert_cmd::Command {
let mut cmd = assert_cmd::Command::cargo_bin("codex-exec")
.expect("should find binary for codex-exec");
cmd.current_dir(self.cwd.path())
.env("CODEX_HOME", self.home.path())
.env("OPENAI_API_KEY", "dummy");
cmd
}
pub fn cmd_with_server(&self, server: &MockServer) -> assert_cmd::Command {
let mut cmd = self.cmd();
let base = format!("{}/v1", server.uri());
cmd.env("OPENAI_BASE_URL", base);
cmd
}
pub fn cwd_path(&self) -> &Path {
self.cwd.path()
}
pub fn home_path(&self) -> &Path {
self.home.path()
}
}
pub fn test_codex_exec() -> TestCodexExecBuilder {
TestCodexExecBuilder {
home: TempDir::new().expect("create temp home"),
cwd: TempDir::new().expect("create temp cwd"),
}
}

View File

@@ -6,7 +6,9 @@ use codex_core::CODEX_APPLY_PATCH_ARG1;
use core_test_support::responses::ev_apply_patch_custom_tool_call;
use core_test_support::responses::ev_apply_patch_function_call;
use core_test_support::responses::ev_completed;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::responses::start_mock_server;
use std::fs;
use std::process::Command;
use tempfile::tempdir;
@@ -47,13 +49,13 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
#[cfg(not(target_os = "windows"))]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_apply_patch_tool() -> anyhow::Result<()> {
use crate::suite::common::run_e2e_exec_test;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex_exec::test_codex_exec;
skip_if_no_network!(Ok(()));
let tmp_cwd = tempdir().expect("failed to create temp dir");
let tmp_path = tmp_cwd.path().to_path_buf();
let test = test_codex_exec();
let tmp_path = test.cwd_path().to_path_buf();
let add_patch = r#"*** Begin Patch
*** Add File: test.md
+Hello world
@@ -75,7 +77,16 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
]),
sse(vec![ev_completed("request_2")]),
];
run_e2e_exec_test(tmp_cwd.path(), response_streams).await;
let server = start_mock_server().await;
mount_sse_sequence(&server, response_streams).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-s")
.arg("danger-full-access")
.arg("foo")
.assert()
.success();
let final_path = tmp_path.join("test.md");
let contents = std::fs::read_to_string(&final_path)
@@ -87,12 +98,12 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
#[cfg(not(target_os = "windows"))]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
use crate::suite::common::run_e2e_exec_test;
use core_test_support::skip_if_no_network;
use core_test_support::test_codex_exec::test_codex_exec;
skip_if_no_network!(Ok(()));
let tmp_cwd = tempdir().expect("failed to create temp dir");
let test = test_codex_exec();
let freeform_add_patch = r#"*** Begin Patch
*** Add File: app.py
+class BaseClass:
@@ -117,10 +128,19 @@ async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
]),
sse(vec![ev_completed("request_2")]),
];
run_e2e_exec_test(tmp_cwd.path(), response_streams).await;
let server = start_mock_server().await;
mount_sse_sequence(&server, response_streams).await;
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
.arg("-s")
.arg("danger-full-access")
.arg("foo")
.assert()
.success();
// Verify final file contents
let final_path = tmp_cwd.path().join("app.py");
let final_path = test.cwd_path().join("app.py");
let contents = std::fs::read_to_string(&final_path)
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
assert_eq!(

View File

@@ -1,69 +0,0 @@
// this file is only used for e2e tests which are currently disabled on windows
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used)]
use anyhow::Context;
use assert_cmd::prelude::*;
use std::path::Path;
use std::process::Command;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::matchers::method;
use wiremock::matchers::path;
use wiremock::Respond;
struct SeqResponder {
num_calls: AtomicUsize,
responses: Vec<String>,
}
impl Respond for SeqResponder {
fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate {
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
match self.responses.get(call_num) {
Some(body) => wiremock::ResponseTemplate::new(200)
.insert_header("content-type", "text/event-stream")
.set_body_string(body.clone()),
None => panic!("no response for {call_num}"),
}
}
}
/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock
/// server, and returns the response_streams in order for each api call. Runs
/// the codex-exec command with the wiremock server as the model server.
pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec<String>) {
let server = MockServer::start().await;
let num_calls = response_streams.len();
let seq_responder = SeqResponder {
num_calls: AtomicUsize::new(0),
responses: response_streams,
};
Mock::given(method("POST"))
.and(path("/v1/responses"))
.respond_with(seq_responder)
.expect(num_calls as u64)
.mount(&server)
.await;
let cwd = cwd.to_path_buf();
let uri = server.uri();
Command::cargo_bin("codex-exec")
.context("should find binary for codex-exec")
.expect("should find binary for codex-exec")
.current_dir(cwd.clone())
.env("CODEX_HOME", cwd)
.env("OPENAI_API_KEY", "dummy")
.env("OPENAI_BASE_URL", format!("{uri}/v1"))
.arg("--skip-git-repo-check")
.arg("-s")
.arg("danger-full-access")
.arg("foo")
.assert()
.success();
}

View File

@@ -1,6 +1,5 @@
// Aggregates all former standalone integration tests as modules.
mod apply_patch;
mod common;
mod output_schema;
mod resume;
mod sandbox;

View File

@@ -1,17 +1,14 @@
#![cfg(not(target_os = "windows"))]
#![allow(clippy::expect_used, clippy::unwrap_used)]
use assert_cmd::prelude::*;
use core_test_support::responses;
use core_test_support::test_codex_exec::test_codex_exec;
use serde_json::Value;
use std::process::Command;
use tempfile::TempDir;
use wiremock::matchers::any;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
let home = TempDir::new()?;
let workspace = TempDir::new()?;
let test = test_codex_exec();
let schema_contents = serde_json::json!({
"type": "object",
@@ -21,7 +18,7 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
"required": ["answer"],
"additionalProperties": false
});
let schema_path = workspace.path().join("schema.json");
let schema_path = test.cwd_path().join("schema.json");
std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
let expected_schema: Value = schema_contents;
@@ -36,14 +33,11 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
]);
responses::mount_sse_once(&server, any(), body).await;
Command::cargo_bin("codex-exec")?
.current_dir(workspace.path())
.env("CODEX_HOME", home.path())
.env("OPENAI_API_KEY", "dummy")
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
test.cmd_with_server(&server)
.arg("--skip-git-repo-check")
// keep using -C in the test to exercise the flag as well
.arg("-C")
.arg(workspace.path())
.arg(test.cwd_path())
.arg("--output-schema")
.arg(&schema_path)
.arg("-m")