Make output assertions more explicit (#4784)
Match using precise regexes.
This commit is contained in:
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -1575,6 +1575,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
"codex-core",
|
||||
"regex-lite",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
|
||||
@@ -10,6 +10,7 @@ path = "lib.rs"
|
||||
anyhow = { workspace = true }
|
||||
assert_cmd = { workspace = true }
|
||||
codex-core = { workspace = true }
|
||||
regex-lite = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
tokio = { workspace = true, features = ["time"] }
|
||||
|
||||
@@ -6,6 +6,7 @@ use codex_core::CodexConversation;
|
||||
use codex_core::config::Config;
|
||||
use codex_core::config::ConfigOverrides;
|
||||
use codex_core::config::ConfigToml;
|
||||
use regex_lite::Regex;
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use assert_cmd::cargo::cargo_bin;
|
||||
@@ -14,6 +15,16 @@ pub mod responses;
|
||||
pub mod test_codex;
|
||||
pub mod test_codex_exec;
|
||||
|
||||
#[track_caller]
|
||||
pub fn assert_regex_match<'s>(pattern: &str, actual: &'s str) -> regex_lite::Captures<'s> {
|
||||
let regex = Regex::new(pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern:?}: {err}");
|
||||
});
|
||||
regex
|
||||
.captures(actual)
|
||||
.unwrap_or_else(|| panic!("regex {pattern:?} did not match {actual:?}"))
|
||||
}
|
||||
|
||||
/// Returns a default `Config` whose on-disk state is confined to the provided
|
||||
/// temporary directory. Using a per-test directory keeps tests hermetic and
|
||||
/// avoids clobbering a developer’s real `~/.codex`.
|
||||
|
||||
@@ -8,6 +8,7 @@ use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
@@ -131,10 +132,7 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
||||
.get("output")
|
||||
.and_then(Value::as_str)
|
||||
.unwrap_or_default();
|
||||
assert!(
|
||||
stdout.contains("shell json"),
|
||||
"expected stdout to include command output, got {stdout:?}"
|
||||
);
|
||||
assert_regex_match(r"(?s)^shell json\n?$", stdout);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -190,18 +188,12 @@ async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
|
||||
serde_json::from_str::<Value>(output).is_err(),
|
||||
"expected structured shell output to be plain text",
|
||||
);
|
||||
assert!(
|
||||
output.starts_with("Exit code: 0\n"),
|
||||
"expected exit code prefix, got {output:?}",
|
||||
);
|
||||
assert!(
|
||||
output.contains("\nOutput:\n"),
|
||||
"expected Output section, got {output:?}"
|
||||
);
|
||||
assert!(
|
||||
output.contains("freeform shell"),
|
||||
"expected stdout content, got {output:?}"
|
||||
);
|
||||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
freeform shell
|
||||
?$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -259,18 +251,27 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
||||
serde_json::from_str::<Value>(output).is_err(),
|
||||
"expected truncated shell output to be plain text",
|
||||
);
|
||||
assert!(
|
||||
output.starts_with("Exit code: 0\n"),
|
||||
"expected exit code prefix, got {output:?}",
|
||||
);
|
||||
assert!(
|
||||
output.lines().any(|line| line == "Total output lines: 400"),
|
||||
"expected total output lines marker, got {output:?}",
|
||||
);
|
||||
assert!(
|
||||
output.contains("[... omitted"),
|
||||
"expected truncated marker, got {output:?}",
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Total output lines: 400
|
||||
Output:
|
||||
Total output lines: 400
|
||||
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
.*\[\.{3} omitted \d+ of 400 lines \.{3}\]
|
||||
|
||||
.*\n396
|
||||
397
|
||||
398
|
||||
399
|
||||
400
|
||||
$"#;
|
||||
assert_regex_match(truncated_pattern, output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::plan_tool::StepStatus;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses;
|
||||
use core_test_support::responses::ev_apply_patch_function_call;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
@@ -116,10 +117,7 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()>
|
||||
let exec_output: Value = serde_json::from_str(output_text)?;
|
||||
assert_eq!(exec_output["metadata"]["exit_code"], 0);
|
||||
let stdout = exec_output["output"].as_str().expect("stdout field");
|
||||
assert!(
|
||||
stdout.contains("tool harness"),
|
||||
"expected stdout to contain command output, got {stdout:?}"
|
||||
);
|
||||
assert_regex_match(r"(?s)^tool harness\n?$", stdout);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use codex_core::protocol::InputItem;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_custom_tool_call;
|
||||
@@ -21,6 +22,7 @@ use core_test_support::skip_if_no_network;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use regex_lite::Regex;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use wiremock::Request;
|
||||
@@ -254,10 +256,8 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
|
||||
"expected exit code 0 after rerunning without escalation",
|
||||
);
|
||||
let stdout = output_json["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
stdout.contains("shell ok"),
|
||||
"expected stdout to include command output, got {stdout:?}"
|
||||
);
|
||||
let stdout_pattern = r"(?s)^shell ok\n?$";
|
||||
assert_regex_match(stdout_pattern, stdout);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -437,15 +437,15 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
|
||||
);
|
||||
|
||||
let stdout = output_json["output"].as_str().unwrap_or_default();
|
||||
assert!(
|
||||
stdout.contains("command timed out after "),
|
||||
"expected timeout prefix, got {stdout:?}"
|
||||
);
|
||||
let third_line = stdout.lines().nth(2).unwrap_or_default();
|
||||
let duration_ms = third_line
|
||||
.strip_prefix("command timed out after ")
|
||||
.and_then(|line| line.strip_suffix(" milliseconds"))
|
||||
.and_then(|value| value.parse::<u64>().ok())
|
||||
let timeout_pattern = r"(?s)^Total output lines: \d+
|
||||
|
||||
command timed out after (?P<ms>\d+) milliseconds
|
||||
line
|
||||
.*$";
|
||||
let captures = assert_regex_match(timeout_pattern, stdout);
|
||||
let duration_ms = captures
|
||||
.name("ms")
|
||||
.and_then(|m| m.as_str().parse::<u64>().ok())
|
||||
.unwrap_or_default();
|
||||
assert!(
|
||||
duration_ms >= timeout_ms,
|
||||
@@ -453,14 +453,8 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
|
||||
);
|
||||
} else {
|
||||
// Fallback: accept the signal classification path to deflake the test.
|
||||
assert!(
|
||||
output_str.contains("execution error"),
|
||||
"unexpected non-JSON output: {output_str:?}"
|
||||
);
|
||||
assert!(
|
||||
output_str.contains("Signal(") || output_str.to_lowercase().contains("signal"),
|
||||
"expected signal classification in error output, got {output_str:?}"
|
||||
);
|
||||
let signal_pattern = r"(?is)^execution error:.*signal.*$";
|
||||
assert_regex_match(signal_pattern, output_str);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -518,30 +512,25 @@ async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
|
||||
.and_then(Value::as_str)
|
||||
.expect("denied output string");
|
||||
|
||||
assert!(
|
||||
output.contains("failed in sandbox: "),
|
||||
"expected sandbox error prefix, got {output:?}"
|
||||
);
|
||||
assert!(
|
||||
output.contains("[... omitted"),
|
||||
"expected truncated marker, got {output:?}"
|
||||
);
|
||||
assert!(
|
||||
output.contains(long_line),
|
||||
"expected truncated stderr sample, got {output:?}"
|
||||
);
|
||||
// Linux distributions may surface sandbox write failures as different errno messages
|
||||
// depending on the underlying mechanism (e.g., EPERM, EACCES, or EROFS). Accept a
|
||||
// small set of common variants to keep this cross-platform.
|
||||
let denial_markers = [
|
||||
"Operation not permitted", // EPERM
|
||||
"Permission denied", // EACCES
|
||||
"Read-only file system", // EROFS
|
||||
];
|
||||
assert!(
|
||||
denial_markers.iter().any(|m| output.contains(m)),
|
||||
"expected sandbox denial message, got {output:?}"
|
||||
);
|
||||
let sandbox_pattern = r#"(?s)^Exit code: -?\d+
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Total output lines: \d+
|
||||
Output:
|
||||
Total output lines: \d+
|
||||
|
||||
failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*?
|
||||
\[\.{3} omitted \d+ of \d+ lines \.{3}\]
|
||||
.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
|
||||
\n?$"#;
|
||||
let sandbox_regex = Regex::new(sandbox_pattern)?;
|
||||
if !sandbox_regex.is_match(output) {
|
||||
let fallback_pattern = r#"(?s)^Total output lines: \d+
|
||||
|
||||
failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz
|
||||
.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
|
||||
.*(?:Operation not permitted|Permission denied|Read-only file system).*$"#;
|
||||
assert_regex_match(fallback_pattern, output);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -604,10 +593,23 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
|
||||
.and_then(Value::as_str)
|
||||
.expect("spawn failure output string");
|
||||
|
||||
assert!(
|
||||
output.contains("execution error:"),
|
||||
"expected execution error prefix, got {output:?}"
|
||||
);
|
||||
let spawn_error_pattern = r#"(?s)^Exit code: -?\d+
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
execution error: .*$"#;
|
||||
let spawn_truncated_pattern = r#"(?s)^Exit code: -?\d+
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Total output lines: \d+
|
||||
Output:
|
||||
Total output lines: \d+
|
||||
|
||||
execution error: .*$"#;
|
||||
let spawn_error_regex = Regex::new(spawn_error_pattern)?;
|
||||
let spawn_truncated_regex = Regex::new(spawn_truncated_pattern)?;
|
||||
if !spawn_error_regex.is_match(output) && !spawn_truncated_regex.is_match(output) {
|
||||
let fallback_pattern = r"(?s)^execution error: .*$";
|
||||
assert_regex_match(fallback_pattern, output);
|
||||
}
|
||||
assert!(output.len() <= 10 * 1024);
|
||||
|
||||
Ok(())
|
||||
|
||||
Reference in New Issue
Block a user