test: faster test execution in codex-core (#2633)

this dramatically improves time to run `cargo test -p codex-core` (~25x
speedup).

before:
```
cargo test -p codex-core  35.96s user 68.63s system 19% cpu 8:49.80 total
```

after:
```
cargo test -p codex-core  5.51s user 8.16s system 63% cpu 21.407 total
```

both tests measured "hot", i.e. on a 2nd run with no filesystem changes,
to exclude compile times.

approach inspired by [Delete Cargo Integration
Tests](https://matklad.github.io/2021/02/27/delete-cargo-integration-tests.html),
we move all test cases in tests/ into a single suite in order to have a
single binary, as there is significant overhead for each test binary
executed, and because test execution is only parallelized with a single
binary.
This commit is contained in:
Jeremy Rose
2025-08-24 11:10:53 -07:00
committed by GitHub
parent c6a52d611c
commit 32bbbbad61
56 changed files with 78 additions and 3 deletions

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,2 @@
// Aggregates all former standalone integration tests as modules.
mod apply_command_e2e;

View File

@@ -6,6 +6,7 @@ version = { workspace = true }
[lib] [lib]
name = "codex_core" name = "codex_core"
path = "src/lib.rs" path = "src/lib.rs"
doctest = false
[lints] [lints]
workspace = true workspace = true

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/all/`.
mod suite;

View File

@@ -0,0 +1,12 @@
// Aggregates all former standalone integration tests as modules.
mod cli_stream;
mod client;
mod compact;
mod exec;
mod exec_stream_events;
mod live_cli;
mod prompt_caching;
mod seatbelt;
mod stream_error_allows_next_turn;
mod stream_no_completed;

View File

@@ -107,8 +107,8 @@ async fn codex_mini_latest_tools() {
assert_eq!(requests.len(), 2, "expected two POST requests"); assert_eq!(requests.len(), 2, "expected two POST requests");
let expected_instructions = [ let expected_instructions = [
include_str!("../prompt.md"), include_str!("../../prompt.md"),
include_str!("../../apply-patch/apply_patch_tool_instructions.md"), include_str!("../../../apply-patch/apply_patch_tool_instructions.md"),
] ]
.join("\n"); .join("\n");
@@ -188,7 +188,7 @@ async fn prompt_tools_are_consistent_across_requests() {
let requests = server.received_requests().await.unwrap(); let requests = server.received_requests().await.unwrap();
assert_eq!(requests.len(), 2, "expected two POST requests"); assert_eq!(requests.len(), 2, "expected two POST requests");
let expected_instructions: &str = include_str!("../prompt.md"); let expected_instructions: &str = include_str!("../../prompt.md");
// our internal implementation is responsible for keeping tools in sync // our internal implementation is responsible for keeping tools in sync
// with the OpenAI schema, so we just verify the tool presence here // with the OpenAI schema, so we just verify the tool presence here
let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch"]; let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch"];

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,3 @@
// Aggregates all former standalone integration tests as modules.
mod apply_patch;
mod sandbox;

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,10 @@
// Aggregates all former standalone integration tests as modules.
mod bad;
mod cp;
mod good;
mod head;
mod literal;
mod ls;
mod parse_sed_command;
mod pwd;
mod sed;

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,2 @@
// Aggregates all former standalone integration tests as modules.
mod landlock;

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,2 @@
// Aggregates all former standalone integration tests as modules.
mod login_server_e2e;

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,8 @@
// Aggregates all former standalone integration tests as modules.
mod auth;
mod codex_message_processor_flow;
mod codex_tool;
mod create_conversation;
mod interrupt;
mod login;
mod send_message;

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,3 @@
// Aggregates all former standalone integration tests as modules.
mod initialize;
mod progress_notification;

View File

@@ -0,0 +1,3 @@
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;

View File

@@ -0,0 +1,5 @@
// Aggregates all former standalone integration tests as modules.
mod status_indicator;
mod vt100_history;
mod vt100_live_commit;
mod vt100_streaming_no_dup;