Make output assertions more explicit (#4784)
Match using precise regexes.
This commit is contained in:
1
codex-rs/Cargo.lock
generated
1
codex-rs/Cargo.lock
generated
@@ -1575,6 +1575,7 @@ dependencies = [
|
|||||||
"anyhow",
|
"anyhow",
|
||||||
"assert_cmd",
|
"assert_cmd",
|
||||||
"codex-core",
|
"codex-core",
|
||||||
|
"regex-lite",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ path = "lib.rs"
|
|||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
assert_cmd = { workspace = true }
|
assert_cmd = { workspace = true }
|
||||||
codex-core = { workspace = true }
|
codex-core = { workspace = true }
|
||||||
|
regex-lite = { workspace = true }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
tempfile = { workspace = true }
|
tempfile = { workspace = true }
|
||||||
tokio = { workspace = true, features = ["time"] }
|
tokio = { workspace = true, features = ["time"] }
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ use codex_core::CodexConversation;
|
|||||||
use codex_core::config::Config;
|
use codex_core::config::Config;
|
||||||
use codex_core::config::ConfigOverrides;
|
use codex_core::config::ConfigOverrides;
|
||||||
use codex_core::config::ConfigToml;
|
use codex_core::config::ConfigToml;
|
||||||
|
use regex_lite::Regex;
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
use assert_cmd::cargo::cargo_bin;
|
use assert_cmd::cargo::cargo_bin;
|
||||||
@@ -14,6 +15,16 @@ pub mod responses;
|
|||||||
pub mod test_codex;
|
pub mod test_codex;
|
||||||
pub mod test_codex_exec;
|
pub mod test_codex_exec;
|
||||||
|
|
||||||
|
#[track_caller]
|
||||||
|
pub fn assert_regex_match<'s>(pattern: &str, actual: &'s str) -> regex_lite::Captures<'s> {
|
||||||
|
let regex = Regex::new(pattern).unwrap_or_else(|err| {
|
||||||
|
panic!("failed to compile regex {pattern:?}: {err}");
|
||||||
|
});
|
||||||
|
regex
|
||||||
|
.captures(actual)
|
||||||
|
.unwrap_or_else(|| panic!("regex {pattern:?} did not match {actual:?}"))
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns a default `Config` whose on-disk state is confined to the provided
|
/// Returns a default `Config` whose on-disk state is confined to the provided
|
||||||
/// temporary directory. Using a per-test directory keeps tests hermetic and
|
/// temporary directory. Using a per-test directory keeps tests hermetic and
|
||||||
/// avoids clobbering a developer’s real `~/.codex`.
|
/// avoids clobbering a developer’s real `~/.codex`.
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use codex_core::protocol::InputItem;
|
|||||||
use codex_core::protocol::Op;
|
use codex_core::protocol::Op;
|
||||||
use codex_core::protocol::SandboxPolicy;
|
use codex_core::protocol::SandboxPolicy;
|
||||||
use codex_protocol::config_types::ReasoningSummary;
|
use codex_protocol::config_types::ReasoningSummary;
|
||||||
|
use core_test_support::assert_regex_match;
|
||||||
use core_test_support::responses::ev_assistant_message;
|
use core_test_support::responses::ev_assistant_message;
|
||||||
use core_test_support::responses::ev_completed;
|
use core_test_support::responses::ev_completed;
|
||||||
use core_test_support::responses::ev_function_call;
|
use core_test_support::responses::ev_function_call;
|
||||||
@@ -131,10 +132,7 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
|
|||||||
.get("output")
|
.get("output")
|
||||||
.and_then(Value::as_str)
|
.and_then(Value::as_str)
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
assert!(
|
assert_regex_match(r"(?s)^shell json\n?$", stdout);
|
||||||
stdout.contains("shell json"),
|
|
||||||
"expected stdout to include command output, got {stdout:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -190,18 +188,12 @@ async fn shell_output_is_structured_with_freeform_apply_patch() -> Result<()> {
|
|||||||
serde_json::from_str::<Value>(output).is_err(),
|
serde_json::from_str::<Value>(output).is_err(),
|
||||||
"expected structured shell output to be plain text",
|
"expected structured shell output to be plain text",
|
||||||
);
|
);
|
||||||
assert!(
|
let expected_pattern = r"(?s)^Exit code: 0
|
||||||
output.starts_with("Exit code: 0\n"),
|
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||||
"expected exit code prefix, got {output:?}",
|
Output:
|
||||||
);
|
freeform shell
|
||||||
assert!(
|
?$";
|
||||||
output.contains("\nOutput:\n"),
|
assert_regex_match(expected_pattern, output);
|
||||||
"expected Output section, got {output:?}"
|
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
output.contains("freeform shell"),
|
|
||||||
"expected stdout content, got {output:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -259,18 +251,27 @@ async fn shell_output_reserializes_truncated_content() -> Result<()> {
|
|||||||
serde_json::from_str::<Value>(output).is_err(),
|
serde_json::from_str::<Value>(output).is_err(),
|
||||||
"expected truncated shell output to be plain text",
|
"expected truncated shell output to be plain text",
|
||||||
);
|
);
|
||||||
assert!(
|
let truncated_pattern = r#"(?s)^Exit code: 0
|
||||||
output.starts_with("Exit code: 0\n"),
|
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||||
"expected exit code prefix, got {output:?}",
|
Total output lines: 400
|
||||||
);
|
Output:
|
||||||
assert!(
|
Total output lines: 400
|
||||||
output.lines().any(|line| line == "Total output lines: 400"),
|
|
||||||
"expected total output lines marker, got {output:?}",
|
1
|
||||||
);
|
2
|
||||||
assert!(
|
3
|
||||||
output.contains("[... omitted"),
|
4
|
||||||
"expected truncated marker, got {output:?}",
|
5
|
||||||
);
|
6
|
||||||
|
.*\[\.{3} omitted \d+ of 400 lines \.{3}\]
|
||||||
|
|
||||||
|
.*\n396
|
||||||
|
397
|
||||||
|
398
|
||||||
|
399
|
||||||
|
400
|
||||||
|
$"#;
|
||||||
|
assert_regex_match(truncated_pattern, output);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use codex_core::protocol::Op;
|
|||||||
use codex_core::protocol::SandboxPolicy;
|
use codex_core::protocol::SandboxPolicy;
|
||||||
use codex_protocol::config_types::ReasoningSummary;
|
use codex_protocol::config_types::ReasoningSummary;
|
||||||
use codex_protocol::plan_tool::StepStatus;
|
use codex_protocol::plan_tool::StepStatus;
|
||||||
|
use core_test_support::assert_regex_match;
|
||||||
use core_test_support::responses;
|
use core_test_support::responses;
|
||||||
use core_test_support::responses::ev_apply_patch_function_call;
|
use core_test_support::responses::ev_apply_patch_function_call;
|
||||||
use core_test_support::responses::ev_assistant_message;
|
use core_test_support::responses::ev_assistant_message;
|
||||||
@@ -116,10 +117,7 @@ async fn shell_tool_executes_command_and_streams_output() -> anyhow::Result<()>
|
|||||||
let exec_output: Value = serde_json::from_str(output_text)?;
|
let exec_output: Value = serde_json::from_str(output_text)?;
|
||||||
assert_eq!(exec_output["metadata"]["exit_code"], 0);
|
assert_eq!(exec_output["metadata"]["exit_code"], 0);
|
||||||
let stdout = exec_output["output"].as_str().expect("stdout field");
|
let stdout = exec_output["output"].as_str().expect("stdout field");
|
||||||
assert!(
|
assert_regex_match(r"(?s)^tool harness\n?$", stdout);
|
||||||
stdout.contains("tool harness"),
|
|
||||||
"expected stdout to contain command output, got {stdout:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ use codex_core::protocol::InputItem;
|
|||||||
use codex_core::protocol::Op;
|
use codex_core::protocol::Op;
|
||||||
use codex_core::protocol::SandboxPolicy;
|
use codex_core::protocol::SandboxPolicy;
|
||||||
use codex_protocol::config_types::ReasoningSummary;
|
use codex_protocol::config_types::ReasoningSummary;
|
||||||
|
use core_test_support::assert_regex_match;
|
||||||
use core_test_support::responses::ev_assistant_message;
|
use core_test_support::responses::ev_assistant_message;
|
||||||
use core_test_support::responses::ev_completed;
|
use core_test_support::responses::ev_completed;
|
||||||
use core_test_support::responses::ev_custom_tool_call;
|
use core_test_support::responses::ev_custom_tool_call;
|
||||||
@@ -21,6 +22,7 @@ use core_test_support::skip_if_no_network;
|
|||||||
use core_test_support::test_codex::TestCodex;
|
use core_test_support::test_codex::TestCodex;
|
||||||
use core_test_support::test_codex::test_codex;
|
use core_test_support::test_codex::test_codex;
|
||||||
use core_test_support::wait_for_event;
|
use core_test_support::wait_for_event;
|
||||||
|
use regex_lite::Regex;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use wiremock::Request;
|
use wiremock::Request;
|
||||||
@@ -254,10 +256,8 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> {
|
|||||||
"expected exit code 0 after rerunning without escalation",
|
"expected exit code 0 after rerunning without escalation",
|
||||||
);
|
);
|
||||||
let stdout = output_json["output"].as_str().unwrap_or_default();
|
let stdout = output_json["output"].as_str().unwrap_or_default();
|
||||||
assert!(
|
let stdout_pattern = r"(?s)^shell ok\n?$";
|
||||||
stdout.contains("shell ok"),
|
assert_regex_match(stdout_pattern, stdout);
|
||||||
"expected stdout to include command output, got {stdout:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -437,15 +437,15 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
|
|||||||
);
|
);
|
||||||
|
|
||||||
let stdout = output_json["output"].as_str().unwrap_or_default();
|
let stdout = output_json["output"].as_str().unwrap_or_default();
|
||||||
assert!(
|
let timeout_pattern = r"(?s)^Total output lines: \d+
|
||||||
stdout.contains("command timed out after "),
|
|
||||||
"expected timeout prefix, got {stdout:?}"
|
command timed out after (?P<ms>\d+) milliseconds
|
||||||
);
|
line
|
||||||
let third_line = stdout.lines().nth(2).unwrap_or_default();
|
.*$";
|
||||||
let duration_ms = third_line
|
let captures = assert_regex_match(timeout_pattern, stdout);
|
||||||
.strip_prefix("command timed out after ")
|
let duration_ms = captures
|
||||||
.and_then(|line| line.strip_suffix(" milliseconds"))
|
.name("ms")
|
||||||
.and_then(|value| value.parse::<u64>().ok())
|
.and_then(|m| m.as_str().parse::<u64>().ok())
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
assert!(
|
assert!(
|
||||||
duration_ms >= timeout_ms,
|
duration_ms >= timeout_ms,
|
||||||
@@ -453,14 +453,8 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
|
|||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
// Fallback: accept the signal classification path to deflake the test.
|
// Fallback: accept the signal classification path to deflake the test.
|
||||||
assert!(
|
let signal_pattern = r"(?is)^execution error:.*signal.*$";
|
||||||
output_str.contains("execution error"),
|
assert_regex_match(signal_pattern, output_str);
|
||||||
"unexpected non-JSON output: {output_str:?}"
|
|
||||||
);
|
|
||||||
assert!(
|
|
||||||
output_str.contains("Signal(") || output_str.to_lowercase().contains("signal"),
|
|
||||||
"expected signal classification in error output, got {output_str:?}"
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -518,30 +512,25 @@ async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
|
|||||||
.and_then(Value::as_str)
|
.and_then(Value::as_str)
|
||||||
.expect("denied output string");
|
.expect("denied output string");
|
||||||
|
|
||||||
assert!(
|
let sandbox_pattern = r#"(?s)^Exit code: -?\d+
|
||||||
output.contains("failed in sandbox: "),
|
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||||
"expected sandbox error prefix, got {output:?}"
|
Total output lines: \d+
|
||||||
);
|
Output:
|
||||||
assert!(
|
Total output lines: \d+
|
||||||
output.contains("[... omitted"),
|
|
||||||
"expected truncated marker, got {output:?}"
|
failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*?
|
||||||
);
|
\[\.{3} omitted \d+ of \d+ lines \.{3}\]
|
||||||
assert!(
|
.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
|
||||||
output.contains(long_line),
|
\n?$"#;
|
||||||
"expected truncated stderr sample, got {output:?}"
|
let sandbox_regex = Regex::new(sandbox_pattern)?;
|
||||||
);
|
if !sandbox_regex.is_match(output) {
|
||||||
// Linux distributions may surface sandbox write failures as different errno messages
|
let fallback_pattern = r#"(?s)^Total output lines: \d+
|
||||||
// depending on the underlying mechanism (e.g., EPERM, EACCES, or EROFS). Accept a
|
|
||||||
// small set of common variants to keep this cross-platform.
|
failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz
|
||||||
let denial_markers = [
|
.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
|
||||||
"Operation not permitted", // EPERM
|
.*(?:Operation not permitted|Permission denied|Read-only file system).*$"#;
|
||||||
"Permission denied", // EACCES
|
assert_regex_match(fallback_pattern, output);
|
||||||
"Read-only file system", // EROFS
|
}
|
||||||
];
|
|
||||||
assert!(
|
|
||||||
denial_markers.iter().any(|m| output.contains(m)),
|
|
||||||
"expected sandbox denial message, got {output:?}"
|
|
||||||
);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@@ -604,10 +593,23 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
|
|||||||
.and_then(Value::as_str)
|
.and_then(Value::as_str)
|
||||||
.expect("spawn failure output string");
|
.expect("spawn failure output string");
|
||||||
|
|
||||||
assert!(
|
let spawn_error_pattern = r#"(?s)^Exit code: -?\d+
|
||||||
output.contains("execution error:"),
|
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||||
"expected execution error prefix, got {output:?}"
|
Output:
|
||||||
);
|
execution error: .*$"#;
|
||||||
|
let spawn_truncated_pattern = r#"(?s)^Exit code: -?\d+
|
||||||
|
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||||
|
Total output lines: \d+
|
||||||
|
Output:
|
||||||
|
Total output lines: \d+
|
||||||
|
|
||||||
|
execution error: .*$"#;
|
||||||
|
let spawn_error_regex = Regex::new(spawn_error_pattern)?;
|
||||||
|
let spawn_truncated_regex = Regex::new(spawn_truncated_pattern)?;
|
||||||
|
if !spawn_error_regex.is_match(output) && !spawn_truncated_regex.is_match(output) {
|
||||||
|
let fallback_pattern = r"(?s)^execution error: .*$";
|
||||||
|
assert_regex_match(fallback_pattern, output);
|
||||||
|
}
|
||||||
assert!(output.len() <= 10 * 1024);
|
assert!(output.len() <= 10 * 1024);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
Reference in New Issue
Block a user