diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 84c52d91..1699867f 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2520,13 +2520,19 @@ mod tests { let out = format_exec_output_str(&exec); + // Strip truncation header if present for subsequent assertions + let body = out + .strip_prefix("Total output lines: ") + .and_then(|rest| rest.split_once("\n\n").map(|x| x.1)) + .unwrap_or(out.as_str()); + // Expect elision marker with correct counts let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144 let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n"); assert!(out.contains(&marker), "missing marker: {out}"); // Validate head and tail - let parts: Vec<&str> = out.split(&marker).collect(); + let parts: Vec<&str> = body.split(&marker).collect(); assert_eq!(parts.len(), 2, "expected one marker split"); let head = parts[0]; let tail = parts[1]; @@ -2562,14 +2568,19 @@ mod tests { }; let out = format_exec_output_str(&exec); - assert!(out.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget"); + // Keep strict budget on the truncated body (excluding header) + let body = out + .strip_prefix("Total output lines: ") + .and_then(|rest| rest.split_once("\n\n").map(|x| x.1)) + .unwrap_or(out.as_str()); + assert!(body.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget"); assert!(out.contains("omitted"), "should contain elision marker"); // Ensure head and tail are drawn from the original - assert!(full.starts_with(out.chars().take(8).collect::().as_str())); + assert!(full.starts_with(body.chars().take(8).collect::().as_str())); assert!( full.ends_with( - out.chars() + body.chars() .rev() .take(8) .collect::() diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index d0c922d1..4bd08c66 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -162,9 +162,9 @@ pub(crate) async fn handle_container_exec_with_params( ), Err(ExecError::Codex(err)) => { let message = format!("execution error: {err:?}"); - Err(FunctionCallError::RespondToModel( - truncate_formatted_exec_output(&message), - )) + Err(FunctionCallError::RespondToModel(format_exec_output( + &message, + ))) } } } @@ -217,20 +217,34 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { "command timed out after {} milliseconds\n{content}", exec_output.duration.as_millis() ); - return truncate_formatted_exec_output(&prefixed); + return format_exec_output(&prefixed); } - truncate_formatted_exec_output(content) + format_exec_output(content) } -fn truncate_formatted_exec_output(content: &str) -> String { +fn truncate_function_error(err: FunctionCallError) -> FunctionCallError { + match err { + FunctionCallError::RespondToModel(msg) => { + FunctionCallError::RespondToModel(format_exec_output(&msg)) + } + FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)), + other => other, + } +} + +fn format_exec_output(content: &str) -> String { // Head+tail truncation for the model: show the beginning and end with an elision. // Clients still receive full streams; only this formatted summary is capped. let total_lines = content.lines().count(); if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES { return content.to_string(); } + let output = truncate_formatted_exec_output(content, total_lines); + format!("Total output lines: {total_lines}\n\n{output}") +} +fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String { let segments: Vec<&str> = content.split_inclusive('\n').collect(); let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len()); let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take)); @@ -285,32 +299,49 @@ fn truncate_formatted_exec_output(content: &str) -> String { result } -fn truncate_function_error(err: FunctionCallError) -> FunctionCallError { - match err { - FunctionCallError::RespondToModel(msg) => { - FunctionCallError::RespondToModel(truncate_formatted_exec_output(&msg)) - } - FunctionCallError::Fatal(msg) => { - FunctionCallError::Fatal(truncate_formatted_exec_output(&msg)) - } - other => other, - } -} - #[cfg(test)] mod tests { use super::*; + use regex_lite::Regex; + + fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { + let pattern = truncated_message_pattern(line, total_lines); + let regex = Regex::new(&pattern).unwrap_or_else(|err| { + panic!("failed to compile regex {pattern}: {err}"); + }); + let captures = regex + .captures(message) + .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}")); + let body = captures + .name("body") + .expect("missing body capture") + .as_str(); + assert!( + body.len() <= MODEL_FORMAT_MAX_BYTES, + "body exceeds byte limit: {} bytes", + body.len() + ); + } + + fn truncated_message_pattern(line: &str, total_lines: usize) -> String { + let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines); + let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take)); + let omitted = total_lines.saturating_sub(head_take + tail_take); + let escaped_line = regex_lite::escape(line); + format!( + r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$", + ) + } #[test] fn truncate_formatted_exec_output_truncates_large_error() { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = truncate_formatted_exec_output(&large_error); + let truncated = format_exec_output(&large_error); - assert!(truncated.len() <= MODEL_FORMAT_MAX_BYTES); - assert!(truncated.starts_with(line)); - assert!(truncated.contains("[... omitted")); + let total_lines = large_error.lines().count(); + assert_truncated_message_matches(&truncated, line, total_lines); assert_ne!(truncated, large_error); } @@ -318,13 +349,12 @@ mod tests { fn truncate_function_error_trims_respond_to_model() { let line = "respond-to-model error that should be truncated\n"; let huge = line.repeat(3_000); + let total_lines = huge.lines().count(); let err = truncate_function_error(FunctionCallError::RespondToModel(huge)); match err { FunctionCallError::RespondToModel(message) => { - assert!(message.len() <= MODEL_FORMAT_MAX_BYTES); - assert!(message.contains("[... omitted")); - assert!(message.starts_with(line)); + assert_truncated_message_matches(&message, line, total_lines); } other => panic!("unexpected error variant: {other:?}"), } @@ -334,13 +364,12 @@ mod tests { fn truncate_function_error_trims_fatal() { let line = "fatal error output that should be truncated\n"; let huge = line.repeat(3_000); + let total_lines = huge.lines().count(); let err = truncate_function_error(FunctionCallError::Fatal(huge)); match err { FunctionCallError::Fatal(message) => { - assert!(message.len() <= MODEL_FORMAT_MAX_BYTES); - assert!(message.contains("[... omitted")); - assert!(message.starts_with(line)); + assert_truncated_message_matches(&message, line, total_lines); } other => panic!("unexpected error variant: {other:?}"), } diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index b3c067ec..99290852 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -438,11 +438,11 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> { let stdout = output_json["output"].as_str().unwrap_or_default(); assert!( - stdout.starts_with("command timed out after "), + stdout.contains("command timed out after "), "expected timeout prefix, got {stdout:?}" ); - let first_line = stdout.lines().next().unwrap_or_default(); - let duration_ms = first_line + let third_line = stdout.lines().nth(2).unwrap_or_default(); + let duration_ms = third_line .strip_prefix("command timed out after ") .and_then(|line| line.strip_suffix(" milliseconds")) .and_then(|value| value.parse::().ok()) @@ -519,7 +519,7 @@ async fn shell_sandbox_denied_truncates_error_output() -> Result<()> { .expect("denied output string"); assert!( - output.starts_with("failed in sandbox: "), + output.contains("failed in sandbox: "), "expected sandbox error prefix, got {output:?}" ); assert!( @@ -605,7 +605,7 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> { .expect("spawn failure output string"); assert!( - output.starts_with("execution error:"), + output.contains("execution error:"), "expected execution error prefix, got {output:?}" ); assert!(output.len() <= 10 * 1024);