Add codex exec testing helpers (#4254)
Add a shortcut to create working directories and run codex exec with fake server.
This commit is contained in:
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -1107,6 +1107,7 @@ name = "core_test_support"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
"codex-core",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
|
||||
@@ -8,6 +8,7 @@ path = "lib.rs"
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
assert_cmd = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
|
||||
@@ -9,6 +9,7 @@ use codex_core::config::ConfigToml;
|
||||
|
||||
pub mod responses;
|
||||
pub mod test_codex;
|
||||
pub mod test_codex_exec;
|
||||
|
||||
/// Returns a default `Config` whose on-disk state is confined to the provided
|
||||
/// temporary directory. Using a per-test directory keeps tests hermetic and
|
||||
|
||||
@@ -2,6 +2,7 @@ use serde_json::Value;
|
||||
use wiremock::BodyPrintLimit;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::Respond;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
@@ -131,3 +132,41 @@ pub async fn start_mock_server() -> MockServer {
|
||||
.start()
|
||||
.await
|
||||
}
|
||||
|
||||
/// Mounts a sequence of SSE response bodies and serves them in order for each
|
||||
/// POST to `/v1/responses`. Panics if more requests are received than bodies
|
||||
/// provided. Also asserts the exact number of expected calls.
|
||||
pub async fn mount_sse_sequence(server: &MockServer, bodies: Vec<String>) {
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
struct SeqResponder {
|
||||
num_calls: AtomicUsize,
|
||||
responses: Vec<String>,
|
||||
}
|
||||
|
||||
impl Respond for SeqResponder {
|
||||
fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
|
||||
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
|
||||
match self.responses.get(call_num) {
|
||||
Some(body) => ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_string(body.clone()),
|
||||
None => panic!("no response for {call_num}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let num_calls = bodies.len();
|
||||
let responder = SeqResponder {
|
||||
num_calls: AtomicUsize::new(0),
|
||||
responses: bodies,
|
||||
};
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(responder)
|
||||
.expect(num_calls as u64)
|
||||
.mount(server)
|
||||
.await;
|
||||
}
|
||||
|
||||
40
codex-rs/core/tests/common/test_codex_exec.rs
Normal file
40
codex-rs/core/tests/common/test_codex_exec.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
#![allow(clippy::expect_used)]
|
||||
use std::path::Path;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::MockServer;
|
||||
|
||||
pub struct TestCodexExecBuilder {
|
||||
home: TempDir,
|
||||
cwd: TempDir,
|
||||
}
|
||||
|
||||
impl TestCodexExecBuilder {
|
||||
pub fn cmd(&self) -> assert_cmd::Command {
|
||||
let mut cmd = assert_cmd::Command::cargo_bin("codex-exec")
|
||||
.expect("should find binary for codex-exec");
|
||||
cmd.current_dir(self.cwd.path())
|
||||
.env("CODEX_HOME", self.home.path())
|
||||
.env("OPENAI_API_KEY", "dummy");
|
||||
cmd
|
||||
}
|
||||
pub fn cmd_with_server(&self, server: &MockServer) -> assert_cmd::Command {
|
||||
let mut cmd = self.cmd();
|
||||
let base = format!("{}/v1", server.uri());
|
||||
cmd.env("OPENAI_BASE_URL", base);
|
||||
cmd
|
||||
}
|
||||
|
||||
pub fn cwd_path(&self) -> &Path {
|
||||
self.cwd.path()
|
||||
}
|
||||
pub fn home_path(&self) -> &Path {
|
||||
self.home.path()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_codex_exec() -> TestCodexExecBuilder {
|
||||
TestCodexExecBuilder {
|
||||
home: TempDir::new().expect("create temp home"),
|
||||
cwd: TempDir::new().expect("create temp cwd"),
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,9 @@ use codex_core::CODEX_APPLY_PATCH_ARG1;
|
||||
use core_test_support::responses::ev_apply_patch_custom_tool_call;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::responses::start_mock_server;
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
use tempfile::tempdir;
|
||||
@@ -47,13 +49,13 @@ fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn test_apply_patch_tool() -> anyhow::Result<()> {
|
||||
use crate::suite::common::run_e2e_exec_test;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp_cwd = tempdir().expect("failed to create temp dir");
|
||||
let tmp_path = tmp_cwd.path().to_path_buf();
|
||||
let test = test_codex_exec();
|
||||
let tmp_path = test.cwd_path().to_path_buf();
|
||||
let add_patch = r#"*** Begin Patch
|
||||
*** Add File: test.md
|
||||
+Hello world
|
||||
@@ -75,7 +77,16 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
|
||||
]),
|
||||
sse(vec![ev_completed("request_2")]),
|
||||
];
|
||||
run_e2e_exec_test(tmp_cwd.path(), response_streams).await;
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, response_streams).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("danger-full-access")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let final_path = tmp_path.join("test.md");
|
||||
let contents = std::fs::read_to_string(&final_path)
|
||||
@@ -87,12 +98,12 @@ async fn test_apply_patch_tool() -> anyhow::Result<()> {
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
|
||||
use crate::suite::common::run_e2e_exec_test;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let tmp_cwd = tempdir().expect("failed to create temp dir");
|
||||
let test = test_codex_exec();
|
||||
let freeform_add_patch = r#"*** Begin Patch
|
||||
*** Add File: app.py
|
||||
+class BaseClass:
|
||||
@@ -117,10 +128,19 @@ async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
|
||||
]),
|
||||
sse(vec![ev_completed("request_2")]),
|
||||
];
|
||||
run_e2e_exec_test(tmp_cwd.path(), response_streams).await;
|
||||
let server = start_mock_server().await;
|
||||
mount_sse_sequence(&server, response_streams).await;
|
||||
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("danger-full-access")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Verify final file contents
|
||||
let final_path = tmp_cwd.path().join("app.py");
|
||||
let final_path = test.cwd_path().join("app.py");
|
||||
let contents = std::fs::read_to_string(&final_path)
|
||||
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
|
||||
assert_eq!(
|
||||
|
||||
@@ -1,69 +0,0 @@
|
||||
// this file is only used for e2e tests which are currently disabled on windows
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used)]
|
||||
|
||||
use anyhow::Context;
|
||||
use assert_cmd::prelude::*;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
use wiremock::Respond;
|
||||
|
||||
struct SeqResponder {
|
||||
num_calls: AtomicUsize,
|
||||
responses: Vec<String>,
|
||||
}
|
||||
|
||||
impl Respond for SeqResponder {
|
||||
fn respond(&self, _: &wiremock::Request) -> wiremock::ResponseTemplate {
|
||||
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
|
||||
match self.responses.get(call_num) {
|
||||
Some(body) => wiremock::ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_string(body.clone()),
|
||||
None => panic!("no response for {call_num}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to run an E2E test of a codex-exec call. Starts a wiremock
|
||||
/// server, and returns the response_streams in order for each api call. Runs
|
||||
/// the codex-exec command with the wiremock server as the model server.
|
||||
pub(crate) async fn run_e2e_exec_test(cwd: &Path, response_streams: Vec<String>) {
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let num_calls = response_streams.len();
|
||||
let seq_responder = SeqResponder {
|
||||
num_calls: AtomicUsize::new(0),
|
||||
responses: response_streams,
|
||||
};
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(seq_responder)
|
||||
.expect(num_calls as u64)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let cwd = cwd.to_path_buf();
|
||||
let uri = server.uri();
|
||||
Command::cargo_bin("codex-exec")
|
||||
.context("should find binary for codex-exec")
|
||||
.expect("should find binary for codex-exec")
|
||||
.current_dir(cwd.clone())
|
||||
.env("CODEX_HOME", cwd)
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("OPENAI_BASE_URL", format!("{uri}/v1"))
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("danger-full-access")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
}
|
||||
@@ -1,6 +1,5 @@
|
||||
// Aggregates all former standalone integration tests as modules.
|
||||
mod apply_patch;
|
||||
mod common;
|
||||
mod output_schema;
|
||||
mod resume;
|
||||
mod sandbox;
|
||||
|
||||
@@ -1,17 +1,14 @@
|
||||
#![cfg(not(target_os = "windows"))]
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use assert_cmd::prelude::*;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::test_codex_exec::test_codex_exec;
|
||||
use serde_json::Value;
|
||||
use std::process::Command;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::matchers::any;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
|
||||
let home = TempDir::new()?;
|
||||
let workspace = TempDir::new()?;
|
||||
let test = test_codex_exec();
|
||||
|
||||
let schema_contents = serde_json::json!({
|
||||
"type": "object",
|
||||
@@ -21,7 +18,7 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
|
||||
"required": ["answer"],
|
||||
"additionalProperties": false
|
||||
});
|
||||
let schema_path = workspace.path().join("schema.json");
|
||||
let schema_path = test.cwd_path().join("schema.json");
|
||||
std::fs::write(&schema_path, serde_json::to_vec_pretty(&schema_contents)?)?;
|
||||
let expected_schema: Value = schema_contents;
|
||||
|
||||
@@ -36,14 +33,11 @@ async fn exec_includes_output_schema_in_request() -> anyhow::Result<()> {
|
||||
]);
|
||||
responses::mount_sse_once(&server, any(), body).await;
|
||||
|
||||
Command::cargo_bin("codex-exec")?
|
||||
.current_dir(workspace.path())
|
||||
.env("CODEX_HOME", home.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
|
||||
test.cmd_with_server(&server)
|
||||
.arg("--skip-git-repo-check")
|
||||
// keep using -C in the test to exercise the flag as well
|
||||
.arg("-C")
|
||||
.arg(workspace.path())
|
||||
.arg(test.cwd_path())
|
||||
.arg("--output-schema")
|
||||
.arg(&schema_path)
|
||||
.arg("-m")
|
||||
|
||||
Reference in New Issue
Block a user