diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 6b4353d8..0a650511 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1575,6 +1575,7 @@ dependencies = [ "anyhow", "assert_cmd", "codex-core", + "regex-lite", "serde_json", "tempfile", "tokio", diff --git a/codex-rs/core/tests/common/Cargo.toml b/codex-rs/core/tests/common/Cargo.toml index ed8bc93a..6ecc5493 100644 --- a/codex-rs/core/tests/common/Cargo.toml +++ b/codex-rs/core/tests/common/Cargo.toml @@ -10,6 +10,7 @@ path = "lib.rs" anyhow = { workspace = true } assert_cmd = { workspace = true } codex-core = { workspace = true } +regex-lite = { workspace = true } serde_json = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true, features = ["time"] } diff --git a/codex-rs/core/tests/common/lib.rs b/codex-rs/core/tests/common/lib.rs index 500c5955..2c012b9b 100644 --- a/codex-rs/core/tests/common/lib.rs +++ b/codex-rs/core/tests/common/lib.rs @@ -6,6 +6,7 @@ use codex_core::CodexConversation; use codex_core::config::Config; use codex_core::config::ConfigOverrides; use codex_core::config::ConfigToml; +use regex_lite::Regex; #[cfg(target_os = "linux")] use assert_cmd::cargo::cargo_bin; @@ -14,6 +15,16 @@ pub mod responses; pub mod test_codex; pub mod test_codex_exec; +#[track_caller] +pub fn assert_regex_match<'s>(pattern: &str, actual: &'s str) -> regex_lite::Captures<'s> { + let regex = Regex::new(pattern).unwrap_or_else(|err| { + panic!("failed to compile regex {pattern:?}: {err}"); + }); + regex + .captures(actual) + .unwrap_or_else(|| panic!("regex {pattern:?} did not match {actual:?}")) +} + /// Returns a default `Config` whose on-disk state is confined to the provided /// temporary directory. Using a per-test directory keeps tests hermetic and /// avoids clobbering a developer’s real `~/.codex`. diff --git a/codex-rs/core/tests/suite/shell_serialization.rs b/codex-rs/core/tests/suite/shell_serialization.rs index 8e4a4757..4e29215f 100644 --- a/codex-rs/core/tests/suite/shell_serialization.rs +++ b/codex-rs/core/tests/suite/shell_serialization.rs @@ -8,6 +8,7 @@ use codex_core::protocol::InputItem; use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; use codex_protocol::config_types::ReasoningSummary; +use core_test_support::assert_regex_match; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_function_call; @@ -131,10 +132,7 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> { .get("output") .and_then(Value::as_str) .unwrap_or_default(); - assert!( - stdout.contains("shell json"), - "expected stdout to include command output, got {stdout:?}" - ); + assert_regex_match(r"(?s)^shell json\n?$", stdout); Ok(()) } @@ -190,18 +188,12 @@ async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> { serde_json::from_str::(output).is_err(), "expected structured shell output to be plain text", ); - assert!( - output.starts_with("Exit code: 0\n"), - "expected exit code prefix, got {output:?}", - ); - assert!( - output.contains("\nOutput:\n"), - "expected Output section, got {output:?}" - ); - assert!( - output.contains("freeform shell"), - "expected stdout content, got {output:?}" - ); + let expected_pattern = r"(?s)^Exit code: 0 +Wall time: [0-9]+(?:\.[0-9]+)? seconds +Output: +freeform shell +?$"; + assert_regex_match(expected_pattern, output); Ok(()) } @@ -259,18 +251,27 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> { serde_json::from_str::(output).is_err(), "expected truncated shell output to be plain text", ); - assert!( - output.starts_with("Exit code: 0\n"), - "expected exit code prefix, got {output:?}", - ); - assert!( - output.lines().any(|line| line == "Total output lines: 400"), - "expected total output lines marker, got {output:?}", - ); - assert!( - output.contains("[... omitted"), - "expected truncated marker, got {output:?}", - ); + let truncated_pattern = r#"(?s)^Exit code: 0 +Wall time: [0-9]+(?:\.[0-9]+)? seconds +Total output lines: 400 +Output: +Total output lines: 400 + +1 +2 +3 +4 +5 +6 +.*\[\.{3} omitted \d+ of 400 lines \.{3}\] + +.*\n396 +397 +398 +399 +400 +$"#; + assert_regex_match(truncated_pattern, output); Ok(()) } diff --git a/codex-rs/core/tests/suite/tool_harness.rs b/codex-rs/core/tests/suite/tool_harness.rs index 74cce5cf..14e0e1c8 100644 --- a/codex-rs/core/tests/suite/tool_harness.rs +++ b/codex-rs/core/tests/suite/tool_harness.rs @@ -9,6 +9,7 @@ use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; use codex_protocol::config_types::ReasoningSummary; use codex_protocol::plan_tool::StepStatus; +use core_test_support::assert_regex_match; use core_test_support::responses; use core_test_support::responses::ev_apply_patch_function_call; use core_test_support::responses::ev_assistant_message; @@ -116,10 +117,7 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()> let exec_output: Value = serde_json::from_str(output_text)?; assert_eq!(exec_output["metadata"]["exit_code"], 0); let stdout = exec_output["output"].as_str().expect("stdout field"); - assert!( - stdout.contains("tool harness"), - "expected stdout to contain command output, got {stdout:?}" - ); + assert_regex_match(r"(?s)^tool harness\n?$", stdout); Ok(()) } diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index a2cae76c..e57cd0a5 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -9,6 +9,7 @@ use codex_core::protocol::InputItem; use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; use codex_protocol::config_types::ReasoningSummary; +use core_test_support::assert_regex_match; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_custom_tool_call; @@ -21,6 +22,7 @@ use core_test_support::skip_if_no_network; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use core_test_support::wait_for_event; +use regex_lite::Regex; use serde_json::Value; use serde_json::json; use wiremock::Request; @@ -254,10 +256,8 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> { "expected exit code 0 after rerunning without escalation", ); let stdout = output_json["output"].as_str().unwrap_or_default(); - assert!( - stdout.contains("shell ok"), - "expected stdout to include command output, got {stdout:?}" - ); + let stdout_pattern = r"(?s)^shell ok\n?$"; + assert_regex_match(stdout_pattern, stdout); Ok(()) } @@ -437,15 +437,15 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> { ); let stdout = output_json["output"].as_str().unwrap_or_default(); - assert!( - stdout.contains("command timed out after "), - "expected timeout prefix, got {stdout:?}" - ); - let third_line = stdout.lines().nth(2).unwrap_or_default(); - let duration_ms = third_line - .strip_prefix("command timed out after ") - .and_then(|line| line.strip_suffix(" milliseconds")) - .and_then(|value| value.parse::().ok()) + let timeout_pattern = r"(?s)^Total output lines: \d+ + +command timed out after (?P\d+) milliseconds +line +.*$"; + let captures = assert_regex_match(timeout_pattern, stdout); + let duration_ms = captures + .name("ms") + .and_then(|m| m.as_str().parse::().ok()) .unwrap_or_default(); assert!( duration_ms >= timeout_ms, @@ -453,14 +453,8 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> { ); } else { // Fallback: accept the signal classification path to deflake the test. - assert!( - output_str.contains("execution error"), - "unexpected non-JSON output: {output_str:?}" - ); - assert!( - output_str.contains("Signal(") || output_str.to_lowercase().contains("signal"), - "expected signal classification in error output, got {output_str:?}" - ); + let signal_pattern = r"(?is)^execution error:.*signal.*$"; + assert_regex_match(signal_pattern, output_str); } Ok(()) @@ -518,30 +512,25 @@ async fn shell_sandbox_denied_truncates_error_output() -> Result<()> { .and_then(Value::as_str) .expect("denied output string"); - assert!( - output.contains("failed in sandbox: "), - "expected sandbox error prefix, got {output:?}" - ); - assert!( - output.contains("[... omitted"), - "expected truncated marker, got {output:?}" - ); - assert!( - output.contains(long_line), - "expected truncated stderr sample, got {output:?}" - ); - // Linux distributions may surface sandbox write failures as different errno messages - // depending on the underlying mechanism (e.g., EPERM, EACCES, or EROFS). Accept a - // small set of common variants to keep this cross-platform. - let denial_markers = [ - "Operation not permitted", // EPERM - "Permission denied", // EACCES - "Read-only file system", // EROFS - ]; - assert!( - denial_markers.iter().any(|m| output.contains(m)), - "expected sandbox denial message, got {output:?}" - ); + let sandbox_pattern = r#"(?s)^Exit code: -?\d+ +Wall time: [0-9]+(?:\.[0-9]+)? seconds +Total output lines: \d+ +Output: +Total output lines: \d+ + +failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*? +\[\.{3} omitted \d+ of \d+ lines \.{3}\] +.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.* +\n?$"#; + let sandbox_regex = Regex::new(sandbox_pattern)?; + if !sandbox_regex.is_match(output) { + let fallback_pattern = r#"(?s)^Total output lines: \d+ + +failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz +.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.* +.*(?:Operation not permitted|Permission denied|Read-only file system).*$"#; + assert_regex_match(fallback_pattern, output); + } Ok(()) } @@ -604,10 +593,23 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> { .and_then(Value::as_str) .expect("spawn failure output string"); - assert!( - output.contains("execution error:"), - "expected execution error prefix, got {output:?}" - ); + let spawn_error_pattern = r#"(?s)^Exit code: -?\d+ +Wall time: [0-9]+(?:\.[0-9]+)? seconds +Output: +execution error: .*$"#; + let spawn_truncated_pattern = r#"(?s)^Exit code: -?\d+ +Wall time: [0-9]+(?:\.[0-9]+)? seconds +Total output lines: \d+ +Output: +Total output lines: \d+ + +execution error: .*$"#; + let spawn_error_regex = Regex::new(spawn_error_pattern)?; + let spawn_truncated_regex = Regex::new(spawn_truncated_pattern)?; + if !spawn_error_regex.is_match(output) && !spawn_truncated_regex.is_match(output) { + let fallback_pattern = r"(?s)^execution error: .*$"; + assert_regex_match(fallback_pattern, output); + } assert!(output.len() <= 10 * 1024); Ok(())