From 0cf57e1f42b54fb1ed9e6af6fdfd18da106e4298 Mon Sep 17 00:00:00 2001
From: pakrym-oai <pakrym@openai.com>
Date: Mon, 11 Aug 2025 11:52:05 -0700
Subject: [PATCH] Include output truncation message in tool call results
 (#2183)

To avoid model being confused about incomplete output.
---
 codex-rs/core/src/codex.rs                | 58 ++++++++---------
 codex-rs/core/src/exec.rs                 | 52 +++++++++++++---
 codex-rs/core/src/shell.rs                |  2 +-
 codex-rs/core/tests/exec.rs               | 76 ++++++++++++++++++++---
 codex-rs/core/tests/exec_stream_events.rs |  6 +-
 codex-rs/linux-sandbox/tests/landlock.rs  |  6 +-
 6 files changed, 146 insertions(+), 54 deletions(-)

diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index 3fcac51b..2dd48bf3 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -51,6 +51,7 @@ use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
+use crate::exec::StreamOutput;
 use crate::exec::process_exec_tool_call;
 use crate::exec_env::create_env;
 use crate::mcp_connection_manager::McpConnectionManager;
@@ -431,8 +432,8 @@ impl Session {
         // Because stdout and stderr could each be up to 100 KiB, we send
         // truncated versions.
         const MAX_STREAM_OUTPUT: usize = 5 * 1024; // 5KiB
-        let stdout = stdout.chars().take(MAX_STREAM_OUTPUT).collect();
-        let stderr = stderr.chars().take(MAX_STREAM_OUTPUT).collect();
+        let stdout = stdout.text.chars().take(MAX_STREAM_OUTPUT).collect();
+        let stderr = stderr.text.chars().take(MAX_STREAM_OUTPUT).collect();
 
         let msg = if is_apply_patch {
             EventMsg::PatchApplyEnd(PatchApplyEndEvent {
@@ -504,8 +505,8 @@ impl Session {
             Err(e) => {
                 output_stderr = ExecToolCallOutput {
                     exit_code: -1,
-                    stdout: String::new(),
-                    stderr: get_error_message_ui(e),
+                    stdout: StreamOutput::new(String::new()),
+                    stderr: StreamOutput::new(get_error_message_ui(e)),
                     duration: Duration::default(),
                 };
                 &output_stderr
@@ -1977,19 +1978,10 @@ async fn handle_container_exec_with_params(
 
     match output_result {
         Ok(output) => {
-            let ExecToolCallOutput {
-                exit_code,
-                stdout,
-                stderr,
-                duration,
-            } = &output;
+            let ExecToolCallOutput { exit_code, .. } = &output;
 
             let is_success = *exit_code == 0;
-            let content = format_exec_output(
-                if is_success { stdout } else { stderr },
-                *exit_code,
-                *duration,
-            );
+            let content = format_exec_output(&output);
             ResponseInputItem::FunctionCallOutput {
                 call_id: call_id.clone(),
                 output: FunctionCallOutputPayload {
@@ -2118,19 +2110,10 @@ async fn handle_sandbox_error(
 
             match retry_output_result {
                 Ok(retry_output) => {
-                    let ExecToolCallOutput {
-                        exit_code,
-                        stdout,
-                        stderr,
-                        duration,
-                    } = &retry_output;
+                    let ExecToolCallOutput { exit_code, .. } = &retry_output;
 
                     let is_success = *exit_code == 0;
-                    let content = format_exec_output(
-                        if is_success { stdout } else { stderr },
-                        *exit_code,
-                        *duration,
-                    );
+                    let content = format_exec_output(&retry_output);
 
                     ResponseInputItem::FunctionCallOutput {
                         call_id: call_id.clone(),
@@ -2163,7 +2146,14 @@ async fn handle_sandbox_error(
 }
 
 /// Exec output is a pre-serialized JSON payload
-fn format_exec_output(output: &str, exit_code: i32, duration: Duration) -> String {
+fn format_exec_output(exec_output: &ExecToolCallOutput) -> String {
+    let ExecToolCallOutput {
+        exit_code,
+        stdout,
+        stderr,
+        duration,
+    } = exec_output;
+
     #[derive(Serialize)]
     struct ExecMetadata {
         exit_code: i32,
@@ -2179,10 +2169,20 @@ fn format_exec_output(output: &str, exit_code: i32, duration: Duration) -> Strin
     // round to 1 decimal place
     let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;
 
+    let is_success = *exit_code == 0;
+    let output = if is_success { stdout } else { stderr };
+
+    let mut formatted_output = output.text.clone();
+    if let Some(truncated_after_lines) = output.truncated_after_lines {
+        formatted_output.push_str(&format!(
+            "\n\n[Output truncated after {truncated_after_lines} lines: too many lines or bytes.]",
+        ));
+    }
+
     let payload = ExecOutput {
-        output,
+        output: &formatted_output,
         metadata: ExecMetadata {
-            exit_code,
+            exit_code: *exit_code,
             duration_seconds,
         },
     };
diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs
index 10606b68..c964466f 100644
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -130,8 +130,8 @@ pub async fn process_exec_tool_call(
     let duration = start.elapsed();
     match raw_output_result {
         Ok(raw_output) => {
-            let stdout = String::from_utf8_lossy(&raw_output.stdout).to_string();
-            let stderr = String::from_utf8_lossy(&raw_output.stderr).to_string();
+            let stdout = raw_output.stdout.from_utf8_lossy();
+            let stderr = raw_output.stderr.from_utf8_lossy();
 
             #[cfg(target_family = "unix")]
             match raw_output.exit_status.signal() {
@@ -146,7 +146,9 @@ pub async fn process_exec_tool_call(
 
             if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
                 return Err(CodexErr::Sandbox(SandboxErr::Denied(
-                    exit_code, stdout, stderr,
+                    exit_code,
+                    stdout.text,
+                    stderr.text,
                 )));
             }
 
@@ -243,18 +245,41 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
     true
 }
 
+#[derive(Debug)]
+pub struct StreamOutput<T> {
+    pub text: T,
+    pub truncated_after_lines: Option<u32>,
+}
 #[derive(Debug)]
 pub struct RawExecToolCallOutput {
     pub exit_status: ExitStatus,
-    pub stdout: Vec<u8>,
-    pub stderr: Vec<u8>,
+    pub stdout: StreamOutput<Vec<u8>>,
+    pub stderr: StreamOutput<Vec<u8>>,
+}
+
+impl StreamOutput<String> {
+    pub fn new(text: String) -> Self {
+        Self {
+            text,
+            truncated_after_lines: None,
+        }
+    }
+}
+
+impl StreamOutput<Vec<u8>> {
+    pub fn from_utf8_lossy(&self) -> StreamOutput<String> {
+        StreamOutput {
+            text: String::from_utf8_lossy(&self.text).to_string(),
+            truncated_after_lines: self.truncated_after_lines,
+        }
+    }
 }
 
 #[derive(Debug)]
 pub struct ExecToolCallOutput {
     pub exit_code: i32,
-    pub stdout: String,
-    pub stderr: String,
+    pub stdout: StreamOutput<String>,
+    pub stderr: StreamOutput<String>,
     pub duration: Duration,
 }
 
@@ -363,7 +388,7 @@ async fn read_capped<R: AsyncRead + Unpin + Send + 'static>(
     max_lines: usize,
     stream: Option<StdoutStream>,
     is_stderr: bool,
-) -> io::Result<Vec<u8>> {
+) -> io::Result<StreamOutput<Vec<u8>>> {
     let mut buf = Vec::with_capacity(max_output.min(8 * 1024));
     let mut tmp = [0u8; 8192];
 
@@ -413,7 +438,16 @@ async fn read_capped<R: AsyncRead + Unpin + Send + 'static>(
         // Continue reading to EOF to avoid back-pressure, but discard once caps are hit.
     }
 
-    Ok(buf)
+    let truncated = remaining_lines == 0 || remaining_bytes == 0;
+
+    Ok(StreamOutput {
+        text: buf,
+        truncated_after_lines: if truncated {
+            Some((max_lines - remaining_lines) as u32)
+        } else {
+            None
+        },
+    })
 }
 
 #[cfg(unix)]
diff --git a/codex-rs/core/src/shell.rs b/codex-rs/core/src/shell.rs
index de0764f7..cc58eb74 100644
--- a/codex-rs/core/src/shell.rs
+++ b/codex-rs/core/src/shell.rs
@@ -230,7 +230,7 @@ mod tests {
             assert_eq!(output.exit_code, 0, "input: {input:?} output: {output:?}");
             if let Some(expected) = expected_output {
                 assert_eq!(
-                    output.stdout, expected,
+                    output.stdout.text, expected,
                     "input: {input:?} output: {output:?}"
                 );
             }
diff --git a/codex-rs/core/tests/exec.rs b/codex-rs/core/tests/exec.rs
index f1b9e78e..9bead7ef 100644
--- a/codex-rs/core/tests/exec.rs
+++ b/codex-rs/core/tests/exec.rs
@@ -1,10 +1,11 @@
 #![cfg(target_os = "macos")]
-#![expect(clippy::expect_used)]
+#![expect(clippy::unwrap_used, clippy::expect_used)]
 
 use std::collections::HashMap;
 use std::sync::Arc;
 
 use codex_core::exec::ExecParams;
+use codex_core::exec::ExecToolCallOutput;
 use codex_core::exec::SandboxType;
 use codex_core::exec::process_exec_tool_call;
 use codex_core::protocol::SandboxPolicy;
@@ -12,14 +13,20 @@ use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
 use tempfile::TempDir;
 use tokio::sync::Notify;
 
+use codex_core::error::Result;
+
 use codex_core::get_platform_sandbox;
 
-async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
+fn skip_test() -> bool {
     if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
         eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
-        return;
+        return true;
     }
 
+    false
+}
+
+async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput> {
     let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
     assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
 
@@ -35,31 +42,82 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
     let ctrl_c = Arc::new(Notify::new());
     let policy = SandboxPolicy::new_read_only_policy();
 
-    let result = process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await;
-
-    assert!(result.is_ok() == should_be_ok);
+    process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await
 }
 
 /// Command succeeds with exit code 0 normally
 #[tokio::test]
 async fn exit_code_0_succeeds() {
+    if skip_test() {
+        return;
+    }
+
     let tmp = TempDir::new().expect("should be able to create temp dir");
     let cmd = vec!["echo", "hello"];
 
-    run_test_cmd(tmp, cmd, true).await
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+    assert_eq!(output.stdout.text, "hello\n");
+    assert_eq!(output.stderr.text, "");
+    assert_eq!(output.stdout.truncated_after_lines, None);
+}
+
+/// Command succeeds with exit code 0 normally
+#[tokio::test]
+async fn truncates_output_lines() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let cmd = vec!["seq", "300"];
+
+    #[expect(clippy::unwrap_used)]
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+
+    let expected_output = (1..=256)
+        .map(|i| format!("{i}\n"))
+        .collect::<Vec<_>>()
+        .join("");
+    assert_eq!(output.stdout.text, expected_output);
+    assert_eq!(output.stdout.truncated_after_lines, Some(256));
+}
+
+/// Command succeeds with exit code 0 normally
+#[tokio::test]
+async fn truncates_output_bytes() {
+    if skip_test() {
+        return;
+    }
+
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    // each line is 1000 bytes
+    let cmd = vec!["bash", "-lc", "seq 15 | awk '{printf \"%-1000s\\n\", $0}'"];
+
+    let output = run_test_cmd(tmp, cmd).await.unwrap();
+
+    assert_eq!(output.stdout.text.len(), 10240);
+    assert_eq!(output.stdout.truncated_after_lines, Some(10));
 }
 
 /// Command not found returns exit code 127, this is not considered a sandbox error
 #[tokio::test]
 async fn exit_command_not_found_is_ok() {
+    if skip_test() {
+        return;
+    }
+
     let tmp = TempDir::new().expect("should be able to create temp dir");
     let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
-    run_test_cmd(tmp, cmd, true).await
+    run_test_cmd(tmp, cmd).await.unwrap();
 }
 
 /// Writing a file fails and should be considered a sandbox error
 #[tokio::test]
 async fn write_file_fails_as_sandbox_error() {
+    if skip_test() {
+        return;
+    }
+
     let tmp = TempDir::new().expect("should be able to create temp dir");
     let path = tmp.path().join("test.txt");
     let cmd = vec![
@@ -67,5 +125,5 @@ async fn write_file_fails_as_sandbox_error() {
         path.to_str().expect("should be able to get path"),
     ];
 
-    run_test_cmd(tmp, cmd, false).await;
+    assert!(run_test_cmd(tmp, cmd).await.is_err());
 }
diff --git a/codex-rs/core/tests/exec_stream_events.rs b/codex-rs/core/tests/exec_stream_events.rs
index 534b2551..36632afd 100644
--- a/codex-rs/core/tests/exec_stream_events.rs
+++ b/codex-rs/core/tests/exec_stream_events.rs
@@ -76,7 +76,7 @@ async fn test_exec_stdout_stream_events_echo() {
     };
 
     assert_eq!(result.exit_code, 0);
-    assert_eq!(result.stdout, "hello-world\n");
+    assert_eq!(result.stdout.text, "hello-world\n");
 
     let streamed = collect_stdout_events(rx);
     // We should have received at least the same contents (possibly in one chunk)
@@ -128,8 +128,8 @@ async fn test_exec_stderr_stream_events_echo() {
     };
 
     assert_eq!(result.exit_code, 0);
-    assert_eq!(result.stdout, "");
-    assert_eq!(result.stderr, "oops\n");
+    assert_eq!(result.stdout.text, "");
+    assert_eq!(result.stderr.text, "oops\n");
 
     // Collect only stderr delta events
     let mut err = Vec::new();
diff --git a/codex-rs/linux-sandbox/tests/landlock.rs b/codex-rs/linux-sandbox/tests/landlock.rs
index 96298c65..c7081dbc 100644
--- a/codex-rs/linux-sandbox/tests/landlock.rs
+++ b/codex-rs/linux-sandbox/tests/landlock.rs
@@ -72,8 +72,8 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
     .unwrap();
 
     if res.exit_code != 0 {
-        println!("stdout:\n{}", res.stdout);
-        println!("stderr:\n{}", res.stderr);
+        println!("stdout:\n{}", res.stdout.text);
+        println!("stderr:\n{}", res.stderr.text);
         panic!("exit code: {}", res.exit_code);
     }
 }
@@ -164,7 +164,7 @@ async fn assert_network_blocked(cmd: &[&str]) {
     .await;
 
     let (exit_code, stdout, stderr) = match result {
-        Ok(output) => (output.exit_code, output.stdout, output.stderr),
+        Ok(output) => (output.exit_code, output.stdout.text, output.stderr.text),
         Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => {
             (exit_code, stdout, stderr)
         }