test: faster test execution in codex-core (#2633)
this dramatically improves time to run `cargo test -p codex-core` (~25x speedup). before: ``` cargo test -p codex-core 35.96s user 68.63s system 19% cpu 8:49.80 total ``` after: ``` cargo test -p codex-core 5.51s user 8.16s system 63% cpu 21.407 total ``` both tests measured "hot", i.e. on a 2nd run with no filesystem changes, to exclude compile times. approach inspired by [Delete Cargo Integration Tests](https://matklad.github.io/2021/02/27/delete-cargo-integration-tests.html), we move all test cases in tests/ into a single suite in order to have a single binary, as there is significant overhead for each test binary executed, and because test execution is only parallelized with a single binary.
This commit is contained in:
339
codex-rs/exec/tests/suite/apply_patch.rs
Normal file
339
codex-rs/exec/tests/suite/apply_patch.rs
Normal file
@@ -0,0 +1,339 @@
|
||||
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
||||
|
||||
use anyhow::Context;
|
||||
use assert_cmd::prelude::*;
|
||||
use codex_core::CODEX_APPLY_PATCH_ARG1;
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
use tempfile::tempdir;
|
||||
|
||||
/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
|
||||
/// at some point, we must ensure that the smaller `codex-exec` CLI can still
|
||||
/// emulate the `apply_patch` CLI.
|
||||
#[test]
|
||||
fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
|
||||
let tmp = tempdir()?;
|
||||
let relative_path = "source.txt";
|
||||
let absolute_path = tmp.path().join(relative_path);
|
||||
fs::write(&absolute_path, "original content\n")?;
|
||||
|
||||
Command::cargo_bin("codex-exec")
|
||||
.context("should find binary for codex-exec")?
|
||||
.arg(CODEX_APPLY_PATCH_ARG1)
|
||||
.arg(
|
||||
r#"*** Begin Patch
|
||||
*** Update File: source.txt
|
||||
@@
|
||||
-original content
|
||||
+modified by apply_patch
|
||||
*** End Patch"#,
|
||||
)
|
||||
.current_dir(tmp.path())
|
||||
.assert()
|
||||
.success()
|
||||
.stdout("Success. Updated the following files:\nM source.txt\n")
|
||||
.stderr(predicates::str::is_empty());
|
||||
assert_eq!(
|
||||
fs::read_to_string(absolute_path)?,
|
||||
"modified by apply_patch\n"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test]
|
||||
async fn test_apply_patch_tool() -> anyhow::Result<()> {
|
||||
use core_test_support::load_sse_fixture_with_id_from_str;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
const SSE_TOOL_CALL_ADD: &str = r#"[
|
||||
{
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "function_call",
|
||||
"name": "apply_patch",
|
||||
"arguments": "{\n \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",
|
||||
"call_id": "__ID__"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "__ID__",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
|
||||
const SSE_TOOL_CALL_UPDATE: &str = r#"[
|
||||
{
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "function_call",
|
||||
"name": "apply_patch",
|
||||
"arguments": "{\n \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
|
||||
"call_id": "__ID__"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "__ID__",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
|
||||
const SSE_TOOL_CALL_COMPLETED: &str = r#"[
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "__ID__",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
|
||||
// Start a mock model server
|
||||
let server = MockServer::start().await;
|
||||
|
||||
// First response: model calls apply_patch to create test.md
|
||||
let first = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(
|
||||
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
|
||||
"text/event-stream",
|
||||
);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
// .and(path("/v1/responses"))
|
||||
.respond_with(first)
|
||||
.up_to_n_times(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
// Second response: model calls apply_patch to update test.md
|
||||
let second = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(
|
||||
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
|
||||
"text/event-stream",
|
||||
);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(second)
|
||||
.up_to_n_times(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let final_completed = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(
|
||||
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
|
||||
"text/event-stream",
|
||||
);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(final_completed)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let tmp_cwd = TempDir::new().unwrap();
|
||||
Command::cargo_bin("codex-exec")
|
||||
.context("should find binary for codex-exec")?
|
||||
.current_dir(tmp_cwd.path())
|
||||
.env("CODEX_HOME", tmp_cwd.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("workspace-write")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Verify final file contents
|
||||
let final_path = tmp_cwd.path().join("test.md");
|
||||
let contents = std::fs::read_to_string(&final_path)
|
||||
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
|
||||
assert_eq!(contents, "Final text\n");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
#[tokio::test]
|
||||
async fn test_apply_patch_freeform_tool() -> anyhow::Result<()> {
|
||||
use core_test_support::load_sse_fixture_with_id_from_str;
|
||||
use tempfile::TempDir;
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
const SSE_TOOL_CALL_ADD: &str = r#"[
|
||||
{
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "custom_tool_call",
|
||||
"name": "apply_patch",
|
||||
"input": "*** Begin Patch\n*** Add File: test.md\n+Hello world\n*** End Patch",
|
||||
"call_id": "__ID__"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "__ID__",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
|
||||
const SSE_TOOL_CALL_UPDATE: &str = r#"[
|
||||
{
|
||||
"type": "response.output_item.done",
|
||||
"item": {
|
||||
"type": "custom_tool_call",
|
||||
"name": "apply_patch",
|
||||
"input": "*** Begin Patch\n*** Update File: test.md\n@@\n-Hello world\n+Final text\n*** End Patch",
|
||||
"call_id": "__ID__"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "__ID__",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
|
||||
const SSE_TOOL_CALL_COMPLETED: &str = r#"[
|
||||
{
|
||||
"type": "response.completed",
|
||||
"response": {
|
||||
"id": "__ID__",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"input_tokens_details": null,
|
||||
"output_tokens": 0,
|
||||
"output_tokens_details": null,
|
||||
"total_tokens": 0
|
||||
},
|
||||
"output": []
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
|
||||
// Start a mock model server
|
||||
let server = MockServer::start().await;
|
||||
|
||||
// First response: model calls apply_patch to create test.md
|
||||
let first = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(
|
||||
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
|
||||
"text/event-stream",
|
||||
);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
// .and(path("/v1/responses"))
|
||||
.respond_with(first)
|
||||
.up_to_n_times(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
// Second response: model calls apply_patch to update test.md
|
||||
let second = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(
|
||||
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
|
||||
"text/event-stream",
|
||||
);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/responses"))
|
||||
.respond_with(second)
|
||||
.up_to_n_times(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let final_completed = ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(
|
||||
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
|
||||
"text/event-stream",
|
||||
);
|
||||
|
||||
Mock::given(method("POST"))
|
||||
// .and(path("/v1/responses"))
|
||||
.respond_with(final_completed)
|
||||
.expect(1)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
let tmp_cwd = TempDir::new().unwrap();
|
||||
Command::cargo_bin("codex-exec")
|
||||
.context("should find binary for codex-exec")?
|
||||
.current_dir(tmp_cwd.path())
|
||||
.env("CODEX_HOME", tmp_cwd.path())
|
||||
.env("OPENAI_API_KEY", "dummy")
|
||||
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
|
||||
.arg("--skip-git-repo-check")
|
||||
.arg("-s")
|
||||
.arg("workspace-write")
|
||||
.arg("foo")
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
// Verify final file contents
|
||||
let final_path = tmp_cwd.path().join("test.md");
|
||||
let contents = std::fs::read_to_string(&final_path)
|
||||
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
|
||||
assert_eq!(contents, "Final text\n");
|
||||
Ok(())
|
||||
}
|
||||
3
codex-rs/exec/tests/suite/mod.rs
Normal file
3
codex-rs/exec/tests/suite/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
// Aggregates all former standalone integration tests as modules.
|
||||
mod apply_patch;
|
||||
mod sandbox;
|
||||
219
codex-rs/exec/tests/suite/sandbox.rs
Normal file
219
codex-rs/exec/tests/suite/sandbox.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
#![cfg(unix)]
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::spawn::StdioPolicy;
|
||||
use std::collections::HashMap;
|
||||
use std::future::Future;
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitStatus;
|
||||
use tokio::process::Child;
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
async fn spawn_command_under_sandbox(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: PathBuf,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
use codex_core::seatbelt::spawn_command_under_seatbelt;
|
||||
spawn_command_under_seatbelt(command, sandbox_policy, cwd, stdio_policy, env).await
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
async fn spawn_command_under_sandbox(
|
||||
command: Vec<String>,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: PathBuf,
|
||||
stdio_policy: StdioPolicy,
|
||||
env: HashMap<String, String>,
|
||||
) -> std::io::Result<Child> {
|
||||
use codex_core::landlock::spawn_command_under_linux_sandbox;
|
||||
let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec");
|
||||
spawn_command_under_linux_sandbox(
|
||||
codex_linux_sandbox_exe,
|
||||
command,
|
||||
sandbox_policy,
|
||||
cwd,
|
||||
stdio_policy,
|
||||
env,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn python_multiprocessing_lock_works_under_sandbox() {
|
||||
#[cfg(target_os = "macos")]
|
||||
let writable_roots = Vec::<PathBuf>::new();
|
||||
|
||||
// From https://man7.org/linux/man-pages/man7/sem_overview.7.html
|
||||
//
|
||||
// > On Linux, named semaphores are created in a virtual filesystem,
|
||||
// > normally mounted under /dev/shm.
|
||||
#[cfg(target_os = "linux")]
|
||||
let writable_roots = vec![PathBuf::from("/dev/shm")];
|
||||
|
||||
let policy = SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
network_access: false,
|
||||
exclude_tmpdir_env_var: false,
|
||||
exclude_slash_tmp: false,
|
||||
};
|
||||
|
||||
let python_code = r#"import multiprocessing
|
||||
from multiprocessing import Lock, Process
|
||||
|
||||
def f(lock):
|
||||
with lock:
|
||||
print("Lock acquired in child process")
|
||||
|
||||
if __name__ == '__main__':
|
||||
lock = Lock()
|
||||
p = Process(target=f, args=(lock,))
|
||||
p.start()
|
||||
p.join()
|
||||
"#;
|
||||
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
vec![
|
||||
"python3".to_string(),
|
||||
"-c".to_string(),
|
||||
python_code.to_string(),
|
||||
],
|
||||
&policy,
|
||||
std::env::current_dir().expect("should be able to get current dir"),
|
||||
StdioPolicy::Inherit,
|
||||
HashMap::new(),
|
||||
)
|
||||
.await
|
||||
.expect("should be able to spawn python under sandbox");
|
||||
|
||||
let status = child.wait().await.expect("should wait for child process");
|
||||
assert!(status.success(), "python exited with {status:?}");
|
||||
}
|
||||
|
||||
fn unix_sock_body() {
|
||||
unsafe {
|
||||
let mut fds = [0i32; 2];
|
||||
let r = libc::socketpair(libc::AF_UNIX, libc::SOCK_DGRAM, 0, fds.as_mut_ptr());
|
||||
assert_eq!(
|
||||
r,
|
||||
0,
|
||||
"socketpair(AF_UNIX, SOCK_DGRAM) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
let msg = b"hello_unix";
|
||||
// write() from one end (generic write is allowed)
|
||||
let sent = libc::write(fds[0], msg.as_ptr() as *const libc::c_void, msg.len());
|
||||
assert!(sent >= 0, "write() failed: {}", io::Error::last_os_error());
|
||||
|
||||
// recvfrom() on the other end. We don’t need the address for socketpair,
|
||||
// so we pass null pointers for src address.
|
||||
let mut buf = [0u8; 64];
|
||||
let recvd = libc::recvfrom(
|
||||
fds[1],
|
||||
buf.as_mut_ptr() as *mut libc::c_void,
|
||||
buf.len(),
|
||||
0,
|
||||
std::ptr::null_mut(),
|
||||
std::ptr::null_mut(),
|
||||
);
|
||||
assert!(
|
||||
recvd >= 0,
|
||||
"recvfrom() failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
let recvd_slice = &buf[..(recvd as usize)];
|
||||
assert_eq!(
|
||||
recvd_slice,
|
||||
&msg[..],
|
||||
"payload mismatch: sent {} bytes, got {} bytes",
|
||||
msg.len(),
|
||||
recvd
|
||||
);
|
||||
|
||||
// Also exercise AF_UNIX stream socketpair quickly to ensure AF_UNIX in general works.
|
||||
let mut sfds = [0i32; 2];
|
||||
let sr = libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, sfds.as_mut_ptr());
|
||||
assert_eq!(
|
||||
sr,
|
||||
0,
|
||||
"socketpair(AF_UNIX, SOCK_STREAM) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
let snt2 = libc::write(sfds[0], msg.as_ptr() as *const libc::c_void, msg.len());
|
||||
assert!(
|
||||
snt2 >= 0,
|
||||
"write(stream) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
let mut b2 = [0u8; 64];
|
||||
let rcv2 = libc::recv(sfds[1], b2.as_mut_ptr() as *mut libc::c_void, b2.len(), 0);
|
||||
assert!(
|
||||
rcv2 >= 0,
|
||||
"recv(stream) failed: {}",
|
||||
io::Error::last_os_error()
|
||||
);
|
||||
|
||||
// Clean up
|
||||
let _ = libc::close(sfds[0]);
|
||||
let _ = libc::close(sfds[1]);
|
||||
let _ = libc::close(fds[0]);
|
||||
let _ = libc::close(fds[1]);
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn allow_unix_socketpair_recvfrom() {
|
||||
run_code_under_sandbox(
|
||||
"allow_unix_socketpair_recvfrom",
|
||||
&SandboxPolicy::ReadOnly,
|
||||
|| async { unix_sock_body() },
|
||||
)
|
||||
.await
|
||||
.expect("should be able to reexec");
|
||||
}
|
||||
|
||||
const IN_SANDBOX_ENV_VAR: &str = "IN_SANDBOX";
|
||||
|
||||
#[expect(clippy::expect_used)]
|
||||
pub async fn run_code_under_sandbox<F, Fut>(
|
||||
test_selector: &str,
|
||||
policy: &SandboxPolicy,
|
||||
child_body: F,
|
||||
) -> io::Result<Option<ExitStatus>>
|
||||
where
|
||||
F: FnOnce() -> Fut + Send + 'static,
|
||||
Fut: Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
if std::env::var(IN_SANDBOX_ENV_VAR).is_err() {
|
||||
let exe = std::env::current_exe()?;
|
||||
let mut cmds = vec![exe.to_string_lossy().into_owned(), "--exact".into()];
|
||||
let mut stdio_policy = StdioPolicy::RedirectForShellTool;
|
||||
// Allow for us to pass forward --nocapture / use the right stdio policy.
|
||||
if std::env::args().any(|a| a == "--nocapture") {
|
||||
cmds.push("--nocapture".into());
|
||||
stdio_policy = StdioPolicy::Inherit;
|
||||
}
|
||||
cmds.push(test_selector.into());
|
||||
|
||||
// Your existing launcher:
|
||||
let mut child = spawn_command_under_sandbox(
|
||||
cmds,
|
||||
policy,
|
||||
std::env::current_dir().expect("should be able to get current dir"),
|
||||
stdio_policy,
|
||||
HashMap::from([("IN_SANDBOX".into(), "1".into())]),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let status = child.wait().await?;
|
||||
Ok(Some(status))
|
||||
} else {
|
||||
// Child branch: run the provided body.
|
||||
child_body().await;
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user