From 0cf57e1f42b54fb1ed9e6af6fdfd18da106e4298 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Mon, 11 Aug 2025 11:52:05 -0700 Subject: [PATCH] Include output truncation message in tool call results (#2183) To avoid model being confused about incomplete output. --- codex-rs/core/src/codex.rs | 58 ++++++++--------- codex-rs/core/src/exec.rs | 52 +++++++++++++--- codex-rs/core/src/shell.rs | 2 +- codex-rs/core/tests/exec.rs | 76 ++++++++++++++++++++--- codex-rs/core/tests/exec_stream_events.rs | 6 +- codex-rs/linux-sandbox/tests/landlock.rs | 6 +- 6 files changed, 146 insertions(+), 54 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 3fcac51b..2dd48bf3 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -51,6 +51,7 @@ use crate::exec::ExecParams; use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StdoutStream; +use crate::exec::StreamOutput; use crate::exec::process_exec_tool_call; use crate::exec_env::create_env; use crate::mcp_connection_manager::McpConnectionManager; @@ -431,8 +432,8 @@ impl Session { // Because stdout and stderr could each be up to 100 KiB, we send // truncated versions. const MAX_STREAM_OUTPUT: usize = 5 * 1024; // 5KiB - let stdout = stdout.chars().take(MAX_STREAM_OUTPUT).collect(); - let stderr = stderr.chars().take(MAX_STREAM_OUTPUT).collect(); + let stdout = stdout.text.chars().take(MAX_STREAM_OUTPUT).collect(); + let stderr = stderr.text.chars().take(MAX_STREAM_OUTPUT).collect(); let msg = if is_apply_patch { EventMsg::PatchApplyEnd(PatchApplyEndEvent { @@ -504,8 +505,8 @@ impl Session { Err(e) => { output_stderr = ExecToolCallOutput { exit_code: -1, - stdout: String::new(), - stderr: get_error_message_ui(e), + stdout: StreamOutput::new(String::new()), + stderr: StreamOutput::new(get_error_message_ui(e)), duration: Duration::default(), }; &output_stderr @@ -1977,19 +1978,10 @@ async fn handle_container_exec_with_params( match output_result { Ok(output) => { - let ExecToolCallOutput { - exit_code, - stdout, - stderr, - duration, - } = &output; + let ExecToolCallOutput { exit_code, .. } = &output; let is_success = *exit_code == 0; - let content = format_exec_output( - if is_success { stdout } else { stderr }, - *exit_code, - *duration, - ); + let content = format_exec_output(&output); ResponseInputItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { @@ -2118,19 +2110,10 @@ async fn handle_sandbox_error( match retry_output_result { Ok(retry_output) => { - let ExecToolCallOutput { - exit_code, - stdout, - stderr, - duration, - } = &retry_output; + let ExecToolCallOutput { exit_code, .. } = &retry_output; let is_success = *exit_code == 0; - let content = format_exec_output( - if is_success { stdout } else { stderr }, - *exit_code, - *duration, - ); + let content = format_exec_output(&retry_output); ResponseInputItem::FunctionCallOutput { call_id: call_id.clone(), @@ -2163,7 +2146,14 @@ async fn handle_sandbox_error( } /// Exec output is a pre-serialized JSON payload -fn format_exec_output(output: &str, exit_code: i32, duration: Duration) -> String { +fn format_exec_output(exec_output: &ExecToolCallOutput) -> String { + let ExecToolCallOutput { + exit_code, + stdout, + stderr, + duration, + } = exec_output; + #[derive(Serialize)] struct ExecMetadata { exit_code: i32, @@ -2179,10 +2169,20 @@ fn format_exec_output(output: &str, exit_code: i32, duration: Duration) -> Strin // round to 1 decimal place let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0; + let is_success = *exit_code == 0; + let output = if is_success { stdout } else { stderr }; + + let mut formatted_output = output.text.clone(); + if let Some(truncated_after_lines) = output.truncated_after_lines { + formatted_output.push_str(&format!( + "\n\n[Output truncated after {truncated_after_lines} lines: too many lines or bytes.]", + )); + } + let payload = ExecOutput { - output, + output: &formatted_output, metadata: ExecMetadata { - exit_code, + exit_code: *exit_code, duration_seconds, }, }; diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 10606b68..c964466f 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -130,8 +130,8 @@ pub async fn process_exec_tool_call( let duration = start.elapsed(); match raw_output_result { Ok(raw_output) => { - let stdout = String::from_utf8_lossy(&raw_output.stdout).to_string(); - let stderr = String::from_utf8_lossy(&raw_output.stderr).to_string(); + let stdout = raw_output.stdout.from_utf8_lossy(); + let stderr = raw_output.stderr.from_utf8_lossy(); #[cfg(target_family = "unix")] match raw_output.exit_status.signal() { @@ -146,7 +146,9 @@ pub async fn process_exec_tool_call( if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) { return Err(CodexErr::Sandbox(SandboxErr::Denied( - exit_code, stdout, stderr, + exit_code, + stdout.text, + stderr.text, ))); } @@ -243,18 +245,41 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool { true } +#[derive(Debug)] +pub struct StreamOutput { + pub text: T, + pub truncated_after_lines: Option, +} #[derive(Debug)] pub struct RawExecToolCallOutput { pub exit_status: ExitStatus, - pub stdout: Vec, - pub stderr: Vec, + pub stdout: StreamOutput>, + pub stderr: StreamOutput>, +} + +impl StreamOutput { + pub fn new(text: String) -> Self { + Self { + text, + truncated_after_lines: None, + } + } +} + +impl StreamOutput> { + pub fn from_utf8_lossy(&self) -> StreamOutput { + StreamOutput { + text: String::from_utf8_lossy(&self.text).to_string(), + truncated_after_lines: self.truncated_after_lines, + } + } } #[derive(Debug)] pub struct ExecToolCallOutput { pub exit_code: i32, - pub stdout: String, - pub stderr: String, + pub stdout: StreamOutput, + pub stderr: StreamOutput, pub duration: Duration, } @@ -363,7 +388,7 @@ async fn read_capped( max_lines: usize, stream: Option, is_stderr: bool, -) -> io::Result> { +) -> io::Result>> { let mut buf = Vec::with_capacity(max_output.min(8 * 1024)); let mut tmp = [0u8; 8192]; @@ -413,7 +438,16 @@ async fn read_capped( // Continue reading to EOF to avoid back-pressure, but discard once caps are hit. } - Ok(buf) + let truncated = remaining_lines == 0 || remaining_bytes == 0; + + Ok(StreamOutput { + text: buf, + truncated_after_lines: if truncated { + Some((max_lines - remaining_lines) as u32) + } else { + None + }, + }) } #[cfg(unix)] diff --git a/codex-rs/core/src/shell.rs b/codex-rs/core/src/shell.rs index de0764f7..cc58eb74 100644 --- a/codex-rs/core/src/shell.rs +++ b/codex-rs/core/src/shell.rs @@ -230,7 +230,7 @@ mod tests { assert_eq!(output.exit_code, 0, "input: {input:?} output: {output:?}"); if let Some(expected) = expected_output { assert_eq!( - output.stdout, expected, + output.stdout.text, expected, "input: {input:?} output: {output:?}" ); } diff --git a/codex-rs/core/tests/exec.rs b/codex-rs/core/tests/exec.rs index f1b9e78e..9bead7ef 100644 --- a/codex-rs/core/tests/exec.rs +++ b/codex-rs/core/tests/exec.rs @@ -1,10 +1,11 @@ #![cfg(target_os = "macos")] -#![expect(clippy::expect_used)] +#![expect(clippy::unwrap_used, clippy::expect_used)] use std::collections::HashMap; use std::sync::Arc; use codex_core::exec::ExecParams; +use codex_core::exec::ExecToolCallOutput; use codex_core::exec::SandboxType; use codex_core::exec::process_exec_tool_call; use codex_core::protocol::SandboxPolicy; @@ -12,14 +13,20 @@ use codex_core::spawn::CODEX_SANDBOX_ENV_VAR; use tempfile::TempDir; use tokio::sync::Notify; +use codex_core::error::Result; + use codex_core::get_platform_sandbox; -async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) { +fn skip_test() -> bool { if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) { eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test."); - return; + return true; } + false +} + +async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result { let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type"); assert_eq!(sandbox_type, SandboxType::MacosSeatbelt); @@ -35,31 +42,82 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) { let ctrl_c = Arc::new(Notify::new()); let policy = SandboxPolicy::new_read_only_policy(); - let result = process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await; - - assert!(result.is_ok() == should_be_ok); + process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await } /// Command succeeds with exit code 0 normally #[tokio::test] async fn exit_code_0_succeeds() { + if skip_test() { + return; + } + let tmp = TempDir::new().expect("should be able to create temp dir"); let cmd = vec!["echo", "hello"]; - run_test_cmd(tmp, cmd, true).await + let output = run_test_cmd(tmp, cmd).await.unwrap(); + assert_eq!(output.stdout.text, "hello\n"); + assert_eq!(output.stderr.text, ""); + assert_eq!(output.stdout.truncated_after_lines, None); +} + +/// Command succeeds with exit code 0 normally +#[tokio::test] +async fn truncates_output_lines() { + if skip_test() { + return; + } + + let tmp = TempDir::new().expect("should be able to create temp dir"); + let cmd = vec!["seq", "300"]; + + #[expect(clippy::unwrap_used)] + let output = run_test_cmd(tmp, cmd).await.unwrap(); + + let expected_output = (1..=256) + .map(|i| format!("{i}\n")) + .collect::>() + .join(""); + assert_eq!(output.stdout.text, expected_output); + assert_eq!(output.stdout.truncated_after_lines, Some(256)); +} + +/// Command succeeds with exit code 0 normally +#[tokio::test] +async fn truncates_output_bytes() { + if skip_test() { + return; + } + + let tmp = TempDir::new().expect("should be able to create temp dir"); + // each line is 1000 bytes + let cmd = vec!["bash", "-lc", "seq 15 | awk '{printf \"%-1000s\\n\", $0}'"]; + + let output = run_test_cmd(tmp, cmd).await.unwrap(); + + assert_eq!(output.stdout.text.len(), 10240); + assert_eq!(output.stdout.truncated_after_lines, Some(10)); } /// Command not found returns exit code 127, this is not considered a sandbox error #[tokio::test] async fn exit_command_not_found_is_ok() { + if skip_test() { + return; + } + let tmp = TempDir::new().expect("should be able to create temp dir"); let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"]; - run_test_cmd(tmp, cmd, true).await + run_test_cmd(tmp, cmd).await.unwrap(); } /// Writing a file fails and should be considered a sandbox error #[tokio::test] async fn write_file_fails_as_sandbox_error() { + if skip_test() { + return; + } + let tmp = TempDir::new().expect("should be able to create temp dir"); let path = tmp.path().join("test.txt"); let cmd = vec![ @@ -67,5 +125,5 @@ async fn write_file_fails_as_sandbox_error() { path.to_str().expect("should be able to get path"), ]; - run_test_cmd(tmp, cmd, false).await; + assert!(run_test_cmd(tmp, cmd).await.is_err()); } diff --git a/codex-rs/core/tests/exec_stream_events.rs b/codex-rs/core/tests/exec_stream_events.rs index 534b2551..36632afd 100644 --- a/codex-rs/core/tests/exec_stream_events.rs +++ b/codex-rs/core/tests/exec_stream_events.rs @@ -76,7 +76,7 @@ async fn test_exec_stdout_stream_events_echo() { }; assert_eq!(result.exit_code, 0); - assert_eq!(result.stdout, "hello-world\n"); + assert_eq!(result.stdout.text, "hello-world\n"); let streamed = collect_stdout_events(rx); // We should have received at least the same contents (possibly in one chunk) @@ -128,8 +128,8 @@ async fn test_exec_stderr_stream_events_echo() { }; assert_eq!(result.exit_code, 0); - assert_eq!(result.stdout, ""); - assert_eq!(result.stderr, "oops\n"); + assert_eq!(result.stdout.text, ""); + assert_eq!(result.stderr.text, "oops\n"); // Collect only stderr delta events let mut err = Vec::new(); diff --git a/codex-rs/linux-sandbox/tests/landlock.rs b/codex-rs/linux-sandbox/tests/landlock.rs index 96298c65..c7081dbc 100644 --- a/codex-rs/linux-sandbox/tests/landlock.rs +++ b/codex-rs/linux-sandbox/tests/landlock.rs @@ -72,8 +72,8 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) { .unwrap(); if res.exit_code != 0 { - println!("stdout:\n{}", res.stdout); - println!("stderr:\n{}", res.stderr); + println!("stdout:\n{}", res.stdout.text); + println!("stderr:\n{}", res.stderr.text); panic!("exit code: {}", res.exit_code); } } @@ -164,7 +164,7 @@ async fn assert_network_blocked(cmd: &[&str]) { .await; let (exit_code, stdout, stderr) = match result { - Ok(output) => (output.exit_code, output.stdout, output.stderr), + Ok(output) => (output.exit_code, output.stdout.text, output.stderr.text), Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => { (exit_code, stdout, stderr) }