Add truncation hint on truncated exec output. (#4740)

When truncating output, add a hint of the total number of lines
2025-10-04 20:29:07 -07:00
parent 356ea6ea34
commit cc2f4aafd7
3 changed files with 77 additions and 37 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -2520,13 +2520,19 @@ mod tests {

        let out = format_exec_output_str(&exec);

+        // Strip truncation header if present for subsequent assertions
+        let body = out
+            .strip_prefix("Total output lines: ")
+            .and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
+            .unwrap_or(out.as_str());
+
        // Expect elision marker with correct counts
        let omitted = 400 - MODEL_FORMAT_MAX_LINES; // 144
        let marker = format!("\n[... omitted {omitted} of 400 lines ...]\n\n");
        assert!(out.contains(&marker), "missing marker: {out}");

        // Validate head and tail
-        let parts: Vec<&str> = out.split(&marker).collect();
+        let parts: Vec<&str> = body.split(&marker).collect();
        assert_eq!(parts.len(), 2, "expected one marker split");
        let head = parts[0];
        let tail = parts[1];
@@ -2562,14 +2568,19 @@ mod tests {
        };

        let out = format_exec_output_str(&exec);
-        assert!(out.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
+        // Keep strict budget on the truncated body (excluding header)
+        let body = out
+            .strip_prefix("Total output lines: ")
+            .and_then(|rest| rest.split_once("\n\n").map(|x| x.1))
+            .unwrap_or(out.as_str());
+        assert!(body.len() <= MODEL_FORMAT_MAX_BYTES, "exceeds byte budget");
        assert!(out.contains("omitted"), "should contain elision marker");

        // Ensure head and tail are drawn from the original
-        assert!(full.starts_with(out.chars().take(8).collect::<String>().as_str()));
+        assert!(full.starts_with(body.chars().take(8).collect::<String>().as_str()));
        assert!(
            full.ends_with(
-                out.chars()
+                body.chars()
                    .rev()
                    .take(8)
                    .collect::<String>()
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -162,9 +162,9 @@ pub(crate) async fn handle_container_exec_with_params(
        ),
        Err(ExecError::Codex(err)) => {
            let message = format!("execution error: {err:?}");
-            Err(FunctionCallError::RespondToModel(
-                truncate_formatted_exec_output(&message),
-            ))
+            Err(FunctionCallError::RespondToModel(format_exec_output(
+                &message,
+            )))
        }
    }
 }
@@ -217,20 +217,34 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
            "command timed out after {} milliseconds\n{content}",
            exec_output.duration.as_millis()
        );
-        return truncate_formatted_exec_output(&prefixed);
+        return format_exec_output(&prefixed);
    }

-    truncate_formatted_exec_output(content)
+    format_exec_output(content)
 }

-fn truncate_formatted_exec_output(content: &str) -> String {
+fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
+    match err {
+        FunctionCallError::RespondToModel(msg) => {
+            FunctionCallError::RespondToModel(format_exec_output(&msg))
+        }
+        FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
+        other => other,
+    }
+}
+
+fn format_exec_output(content: &str) -> String {
    // Head+tail truncation for the model: show the beginning and end with an elision.
    // Clients still receive full streams; only this formatted summary is capped.
    let total_lines = content.lines().count();
    if content.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
        return content.to_string();
    }
+    let output = truncate_formatted_exec_output(content, total_lines);
+    format!("Total output lines: {total_lines}\n\n{output}")
+}

+fn truncate_formatted_exec_output(content: &str, total_lines: usize) -> String {
    let segments: Vec<&str> = content.split_inclusive('\n').collect();
    let head_take = MODEL_FORMAT_HEAD_LINES.min(segments.len());
    let tail_take = MODEL_FORMAT_TAIL_LINES.min(segments.len().saturating_sub(head_take));
@@ -285,32 +299,49 @@ fn truncate_formatted_exec_output(content: &str) -> String {
    result
 }

-fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
-    match err {
-        FunctionCallError::RespondToModel(msg) => {
-            FunctionCallError::RespondToModel(truncate_formatted_exec_output(&msg))
-        }
-        FunctionCallError::Fatal(msg) => {
-            FunctionCallError::Fatal(truncate_formatted_exec_output(&msg))
-        }
-        other => other,
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
+    use regex_lite::Regex;
+
+    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
+        let pattern = truncated_message_pattern(line, total_lines);
+        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
+            panic!("failed to compile regex {pattern}: {err}");
+        });
+        let captures = regex
+            .captures(message)
+            .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {message}"));
+        let body = captures
+            .name("body")
+            .expect("missing body capture")
+            .as_str();
+        assert!(
+            body.len() <= MODEL_FORMAT_MAX_BYTES,
+            "body exceeds byte limit: {} bytes",
+            body.len()
+        );
+    }
+
+    fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
+        let head_take = MODEL_FORMAT_HEAD_LINES.min(total_lines);
+        let tail_take = MODEL_FORMAT_TAIL_LINES.min(total_lines.saturating_sub(head_take));
+        let omitted = total_lines.saturating_sub(head_take + tail_take);
+        let escaped_line = regex_lite::escape(line);
+        format!(
+            r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
+        )
+    }

    #[test]
    fn truncate_formatted_exec_output_truncates_large_error() {
        let line = "very long execution error line that should trigger truncation\n";
        let large_error = line.repeat(2_500); // way beyond both byte and line limits

-        let truncated = truncate_formatted_exec_output(&large_error);
+        let truncated = format_exec_output(&large_error);

-        assert!(truncated.len() <= MODEL_FORMAT_MAX_BYTES);
-        assert!(truncated.starts_with(line));
-        assert!(truncated.contains("[... omitted"));
+        let total_lines = large_error.lines().count();
+        assert_truncated_message_matches(&truncated, line, total_lines);
        assert_ne!(truncated, large_error);
    }

@@ -318,13 +349,12 @@ mod tests {
    fn truncate_function_error_trims_respond_to_model() {
        let line = "respond-to-model error that should be truncated\n";
        let huge = line.repeat(3_000);
+        let total_lines = huge.lines().count();

        let err = truncate_function_error(FunctionCallError::RespondToModel(huge));
        match err {
            FunctionCallError::RespondToModel(message) => {
-                assert!(message.len() <= MODEL_FORMAT_MAX_BYTES);
-                assert!(message.contains("[... omitted"));
-                assert!(message.starts_with(line));
+                assert_truncated_message_matches(&message, line, total_lines);
            }
            other => panic!("unexpected error variant: {other:?}"),
        }
@@ -334,13 +364,12 @@ mod tests {
    fn truncate_function_error_trims_fatal() {
        let line = "fatal error output that should be truncated\n";
        let huge = line.repeat(3_000);
+        let total_lines = huge.lines().count();

        let err = truncate_function_error(FunctionCallError::Fatal(huge));
        match err {
            FunctionCallError::Fatal(message) => {
-                assert!(message.len() <= MODEL_FORMAT_MAX_BYTES);
-                assert!(message.contains("[... omitted"));
-                assert!(message.starts_with(line));
+                assert_truncated_message_matches(&message, line, total_lines);
            }
            other => panic!("unexpected error variant: {other:?}"),
        }
--- a/codex-rs/core/tests/suite/tools.rs
+++ b/codex-rs/core/tests/suite/tools.rs
@@ -438,11 +438,11 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {

        let stdout = output_json["output"].as_str().unwrap_or_default();
        assert!(
-            stdout.starts_with("command timed out after "),
+            stdout.contains("command timed out after "),
            "expected timeout prefix, got {stdout:?}"
        );
-        let first_line = stdout.lines().next().unwrap_or_default();
-        let duration_ms = first_line
+        let third_line = stdout.lines().nth(2).unwrap_or_default();
+        let duration_ms = third_line
            .strip_prefix("command timed out after ")
            .and_then(|line| line.strip_suffix(" milliseconds"))
            .and_then(|value| value.parse::<u64>().ok())
@@ -519,7 +519,7 @@ async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
        .expect("denied output string");

    assert!(
-        output.starts_with("failed in sandbox: "),
+        output.contains("failed in sandbox: "),
        "expected sandbox error prefix, got {output:?}"
    );
    assert!(
@@ -605,7 +605,7 @@ async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
        .expect("spawn failure output string");

    assert!(
-        output.starts_with("execution error:"),
+        output.contains("execution error:"),
        "expected execution error prefix, got {output:?}"
    );
    assert!(output.len() <= 10 * 1024);