From bfe332812989d1804dde59f910e08ea1a8daa76f Mon Sep 17 00:00:00 2001 From: jif-oai Date: Fri, 3 Oct 2025 18:09:41 +0100 Subject: [PATCH] Fix flaky test (#4672) This issue was due to the fact that the timeout is not always sufficient to have enough character for truncation + a race between synthetic timeout and process kill --- codex-rs/core/tests/suite/tools.rs | 70 +++++++++++++++++------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index 4d6ccdac..8d78d067 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -414,37 +414,47 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> { .find(|item| item.get("call_id").and_then(Value::as_str) == Some(call_id)) .expect("timeout output present"); - let output_json: Value = serde_json::from_str( - timeout_item - .get("output") - .and_then(Value::as_str) - .expect("timeout output string"), - )?; - assert_eq!( - output_json["metadata"]["exit_code"].as_i64(), - Some(124), - "expected timeout exit code 124", - ); + let output_str = timeout_item + .get("output") + .and_then(Value::as_str) + .expect("timeout output string"); - let stdout = output_json["output"].as_str().unwrap_or_default(); - assert!( - stdout.starts_with("command timed out after "), - "expected timeout prefix, got {stdout:?}" - ); - let first_line = stdout.lines().next().unwrap_or_default(); - let duration_ms = first_line - .strip_prefix("command timed out after ") - .and_then(|line| line.strip_suffix(" milliseconds")) - .and_then(|value| value.parse::().ok()) - .unwrap_or_default(); - assert!( - duration_ms >= timeout_ms, - "expected duration >= configured timeout, got {duration_ms} (timeout {timeout_ms})" - ); - assert!( - stdout.contains("[... omitted"), - "expected truncated output marker, got {stdout:?}" - ); + // The exec path can report a timeout in two ways depending on timing: + // 1) Structured JSON with exit_code 124 and a timeout prefix (preferred), or + // 2) A plain error string if the child is observed as killed by a signal first. + if let Ok(output_json) = serde_json::from_str::(output_str) { + assert_eq!( + output_json["metadata"]["exit_code"].as_i64(), + Some(124), + "expected timeout exit code 124", + ); + + let stdout = output_json["output"].as_str().unwrap_or_default(); + assert!( + stdout.starts_with("command timed out after "), + "expected timeout prefix, got {stdout:?}" + ); + let first_line = stdout.lines().next().unwrap_or_default(); + let duration_ms = first_line + .strip_prefix("command timed out after ") + .and_then(|line| line.strip_suffix(" milliseconds")) + .and_then(|value| value.parse::().ok()) + .unwrap_or_default(); + assert!( + duration_ms >= timeout_ms, + "expected duration >= configured timeout, got {duration_ms} (timeout {timeout_ms})" + ); + } else { + // Fallback: accept the signal classification path to deflake the test. + assert!( + output_str.contains("execution error"), + "unexpected non-JSON output: {output_str:?}" + ); + assert!( + output_str.contains("Signal(") || output_str.to_lowercase().contains("signal"), + "expected signal classification in error output, got {output_str:?}" + ); + } Ok(()) }