Include output truncation message in tool call results (#2183)

To avoid model being confused about incomplete output.
2025-08-11 11:52:05 -07:00
parent b76a562c49
commit 0cf57e1f42
6 changed files with 146 additions and 54 deletions
--- a/codex-rs/core/tests/exec.rs
+++ b/codex-rs/core/tests/exec.rs
@@ -1,10 +1,11 @@
 #![cfg(target_os = "macos")]
-#![expect(clippy::expect_used)]
+#![expect(clippy::unwrap_used, clippy::expect_used)]

 use std::collections::HashMap;
 use std::sync::Arc;

 use codex_core::exec::ExecParams;
+use codex_core::exec::ExecToolCallOutput;
 use codex_core::exec::SandboxType;
 use codex_core::exec::process_exec_tool_call;
 use codex_core::protocol::SandboxPolicy;
@@ -12,14 +13,20 @@ use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
 use tempfile::TempDir;
 use tokio::sync::Notify;

+use codex_core::error::Result;
+
 use codex_core::get_platform_sandbox;

-async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
+fn skip_test() -> bool {
    if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
        eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
-        return;
+        return true;
    }

+    false
+}
+
+async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
    let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
    assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);

@@ -35,31 +42,82 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
    let ctrl_c = Arc::new(Notify::new());
    let policy = SandboxPolicy::new_read_only_policy();

-    let result = process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await;
-
-    assert!(result.is_ok() == should_be_ok);
+    process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await
 }

 /// Command succeeds with exit code 0 normally
 #[tokio::test]
 async fn exit_code_0_succeeds() {
+    if skip_test() {
+        return;
+    }
+
    let tmp = TempDir::new().expect("should be able to create temp dir");
    let cmd = vec!["echo", "hello"];

-    run_test_cmd(tmp, cmd, true).await
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+    assert_eq!(output.stdout.text, "hello\n");
+    assert_eq!(output.stderr.text, "");
+    assert_eq!(output.stdout.truncated_after_lines, None);
+}
+
+/// Command succeeds with exit code 0 normally
+#[tokio::test]
+async fn truncates_output_lines() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let cmd = vec!["seq", "300"];
+
+    #[expect(clippy::unwrap_used)]
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+
+    let expected_output = (1..=256)
+        .map(|i| format!("{i}\n"))
+        .collect::<Vec<_>>()
+        .join("");
+    assert_eq!(output.stdout.text, expected_output);
+    assert_eq!(output.stdout.truncated_after_lines, Some(256));
+}
+
+/// Command succeeds with exit code 0 normally
+#[tokio::test]
+async fn truncates_output_bytes() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    // each line is 1000 bytes
+    let cmd = vec!["bash", "-lc", "seq 15 | awk '{printf \"%-1000s\\n\", $0}'"];
+
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+
+    assert_eq!(output.stdout.text.len(), 10240);
+    assert_eq!(output.stdout.truncated_after_lines, Some(10));
 }

 /// Command not found returns exit code 127, this is not considered a sandbox error
 #[tokio::test]
 async fn exit_command_not_found_is_ok() {
+    if skip_test() {
+        return;
+    }
+
    let tmp = TempDir::new().expect("should be able to create temp dir");
    let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
-    run_test_cmd(tmp, cmd, true).await
+    run_test_cmd(tmp, cmd).await.unwrap();
 }

 /// Writing a file fails and should be considered a sandbox error
 #[tokio::test]
 async fn write_file_fails_as_sandbox_error() {
+    if skip_test() {
+        return;
+    }
+
    let tmp = TempDir::new().expect("should be able to create temp dir");
    let path = tmp.path().join("test.txt");
    let cmd = vec![
@@ -67,5 +125,5 @@ async fn write_file_fails_as_sandbox_error() {
        path.to_str().expect("should be able to get path"),
    ];

-    run_test_cmd(tmp, cmd, false).await;
+    assert!(run_test_cmd(tmp, cmd).await.is_err());
 }
--- a/codex-rs/core/tests/exec_stream_events.rs
+++ b/codex-rs/core/tests/exec_stream_events.rs
@@ -76,7 +76,7 @@ async fn test_exec_stdout_stream_events_echo() {
    };

    assert_eq!(result.exit_code, 0);
-    assert_eq!(result.stdout, "hello-world\n");
+    assert_eq!(result.stdout.text, "hello-world\n");

    let streamed = collect_stdout_events(rx);
    // We should have received at least the same contents (possibly in one chunk)
@@ -128,8 +128,8 @@ async fn test_exec_stderr_stream_events_echo() {
    };

    assert_eq!(result.exit_code, 0);
-    assert_eq!(result.stdout, "");
-    assert_eq!(result.stderr, "oops\n");
+    assert_eq!(result.stdout.text, "");
+    assert_eq!(result.stderr.text, "oops\n");

    // Collect only stderr delta events
    let mut err = Vec::new();