Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the timeout.
2025-09-14 14:38:26 -07:00
parent 7e1543f5d8
commit 863d9c237e
5 changed files with 147 additions and 50 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -845,6 +845,7 @@ impl Session {
            aggregated_output,
            duration,
            exit_code,
+            timed_out: _,
        } = output;
        // Send full stdout/stderr to clients; do not truncate.
        let stdout = stdout.text.clone();
@@ -920,6 +921,7 @@ impl Session {
        let output_stderr;
        let borrowed: &ExecToolCallOutput = match &result {
            Ok(output) => output,
+            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
            Err(e) => {
                output_stderr = ExecToolCallOutput {
                    exit_code: -1,
@@ -927,6 +929,7 @@ impl Session {
                    stderr: StreamOutput::new(get_error_message_ui(e)),
                    aggregated_output: StreamOutput::new(get_error_message_ui(e)),
                    duration: Duration::default(),
+                    timed_out: false,
                };
                &output_stderr
            }
@@ -2917,15 +2920,12 @@ async fn handle_sandbox_error(
    let sub_id = exec_command_context.sub_id.clone();
    let cwd = exec_command_context.cwd.clone();

-    // if the command timed out, we can simply return this failure to the model
-    if matches!(error, SandboxErr::Timeout) {
+    if let SandboxErr::Timeout { output } = &error {
+        let content = format_exec_output(output);
        return ResponseInputItem::FunctionCallOutput {
            call_id,
            output: FunctionCallOutputPayload {
-                content: format!(
-                    "command timed out after {} milliseconds",
-                    params.timeout_duration().as_millis()
-                ),
+                content,
                success: Some(false),
            },
        };
@@ -3050,7 +3050,17 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
    // Head+tail truncation for the model: show the beginning and end with an elision.
    // Clients still receive full streams; only this formatted summary is capped.

-    let s = aggregated_output.text.as_str();
+    let mut s = &aggregated_output.text;
+    let prefixed_str: String;
+
+    if exec_output.timed_out {
+        prefixed_str = format!(
+            "command timed out after {} milliseconds\n",
+            exec_output.duration.as_millis()
+        ) + s;
+        s = &prefixed_str;
+    }
+
    let total_lines = s.lines().count();
    if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
        return s.to_string();
@@ -3093,6 +3103,7 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
    // Build final string respecting byte budgets
    let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget);
    let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));
+
    result.push_str(head_part);
    result.push_str(&marker);

@@ -3342,6 +3353,7 @@ mod tests {
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new(full),
            duration: StdDuration::from_secs(1),
+            timed_out: false,
        };

        let out = format_exec_output_str(&exec);
@@ -3384,6 +3396,7 @@ mod tests {
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new(full.clone()),
            duration: StdDuration::from_secs(1),
+            timed_out: false,
        };

        let out = format_exec_output_str(&exec);
@@ -3406,6 +3419,25 @@ mod tests {
        );
    }

+    #[test]
+    fn includes_timed_out_message() {
+        let exec = ExecToolCallOutput {
+            exit_code: 0,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new("Command output".to_string()),
+            duration: StdDuration::from_secs(1),
+            timed_out: true,
+        };
+
+        let out = format_exec_output_str(&exec);
+
+        assert_eq!(
+            out,
+            "command timed out after 1000 milliseconds\nCommand output"
+        );
+    }
+
    #[test]
    fn falls_back_to_content_when_structured_is_null() {
        let ctr = CallToolResult {
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -1,3 +1,4 @@
+use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
 use codex_protocol::mcp_protocol::ConversationId;
@@ -13,8 +14,11 @@ pub type Result<T> = std::result::Result<T, CodexErr>;
 #[derive(Error, Debug)]
 pub enum SandboxErr {
    /// Error from sandbox execution
-    #[error("sandbox denied exec error, exit code: {0}, stdout: {1}, stderr: {2}")]
-    Denied(i32, String, String),
+    #[error(
+        "sandbox denied exec error, exit code: {}, stdout: {}, stderr: {}",
+        .output.exit_code, .output.stdout.text, .output.stderr.text
+    )]
+    Denied { output: Box<ExecToolCallOutput> },

    /// Error from linux seccomp filter setup
    #[cfg(target_os = "linux")]
@@ -28,7 +32,7 @@ pub enum SandboxErr {

    /// Command timed out
    #[error("command timed out")]
-    Timeout,
+    Timeout { output: Box<ExecToolCallOutput> },

    /// Command was killed by a signal
    #[error("command was killed by a signal")]
@@ -245,9 +249,12 @@ impl CodexErr {

 pub fn get_error_message_ui(e: &CodexErr) -> String {
    match e {
-        CodexErr::Sandbox(SandboxErr::Denied(_, _, stderr)) => stderr.to_string(),
+        CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
        // Timeouts are not sandbox errors from a UX perspective; present them plainly
-        CodexErr::Sandbox(SandboxErr::Timeout) => "error: command timed out".to_string(),
+        CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
+            "error: command timed out after {} ms",
+            output.duration.as_millis()
+        ),
        _ => e.to_string(),
    }
 }
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -34,6 +34,7 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000;
 const SIGKILL_CODE: i32 = 9;
 const TIMEOUT_CODE: i32 = 64;
 const EXIT_CODE_SIGNAL_BASE: i32 = 128; // conventional shell: 128 + signal
+const EXEC_TIMEOUT_EXIT_CODE: i32 = 124; // conventional timeout exit code

 // I/O buffer sizing
 const READ_CHUNK_SIZE: usize = 8192; // bytes per read
@@ -86,11 +87,12 @@ pub async fn process_exec_tool_call(
 ) -> Result<ExecToolCallOutput> {
    let start = Instant::now();

+    let timeout_duration = params.timeout_duration();
+
    let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
    {
        SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await,
        SandboxType::MacosSeatbelt => {
-            let timeout = params.timeout_duration();
            let ExecParams {
                command, cwd, env, ..
            } = params;
@@ -102,10 +104,9 @@ pub async fn process_exec_tool_call(
                env,
            )
            .await?;
-            consume_truncated_output(child, timeout, stdout_stream.clone()).await
+            consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await
        }
        SandboxType::LinuxSeccomp => {
-            let timeout = params.timeout_duration();
            let ExecParams {
                command, cwd, env, ..
            } = params;
@@ -123,41 +124,56 @@ pub async fn process_exec_tool_call(
            )
            .await?;

-            consume_truncated_output(child, timeout, stdout_stream).await
+            consume_truncated_output(child, timeout_duration, stdout_stream).await
        }
    };
    let duration = start.elapsed();
    match raw_output_result {
        Ok(raw_output) => {
-            let stdout = raw_output.stdout.from_utf8_lossy();
-            let stderr = raw_output.stderr.from_utf8_lossy();
+            #[allow(unused_mut)]
+            let mut timed_out = raw_output.timed_out;

            #[cfg(target_family = "unix")]
-            match raw_output.exit_status.signal() {
-                Some(TIMEOUT_CODE) => return Err(CodexErr::Sandbox(SandboxErr::Timeout)),
-                Some(signal) => {
-                    return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
+            {
+                if let Some(signal) = raw_output.exit_status.signal() {
+                    if signal == TIMEOUT_CODE {
+                        timed_out = true;
+                    } else {
+                        return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
+                    }
                }
-                None => {}
            }

-            let exit_code = raw_output.exit_status.code().unwrap_or(-1);
-
-            if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
-                return Err(CodexErr::Sandbox(SandboxErr::Denied(
-                    exit_code,
-                    stdout.text,
-                    stderr.text,
-                )));
+            let mut exit_code = raw_output.exit_status.code().unwrap_or(-1);
+            if timed_out {
+                exit_code = EXEC_TIMEOUT_EXIT_CODE;
            }

-            Ok(ExecToolCallOutput {
+            let stdout = raw_output.stdout.from_utf8_lossy();
+            let stderr = raw_output.stderr.from_utf8_lossy();
+            let aggregated_output = raw_output.aggregated_output.from_utf8_lossy();
+            let exec_output = ExecToolCallOutput {
                exit_code,
                stdout,
                stderr,
-                aggregated_output: raw_output.aggregated_output.from_utf8_lossy(),
+                aggregated_output,
                duration,
-            })
+                timed_out,
+            };
+
+            if timed_out {
+                return Err(CodexErr::Sandbox(SandboxErr::Timeout {
+                    output: Box::new(exec_output),
+                }));
+            }
+
+            if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
+                return Err(CodexErr::Sandbox(SandboxErr::Denied {
+                    output: Box::new(exec_output),
+                }));
+            }
+
+            Ok(exec_output)
        }
        Err(err) => {
            tracing::error!("exec error: {err}");
@@ -197,6 +213,7 @@ struct RawExecToolCallOutput {
    pub stdout: StreamOutput<Vec<u8>>,
    pub stderr: StreamOutput<Vec<u8>>,
    pub aggregated_output: StreamOutput<Vec<u8>>,
+    pub timed_out: bool,
 }

 impl StreamOutput<String> {
@@ -229,6 +246,7 @@ pub struct ExecToolCallOutput {
    pub stderr: StreamOutput<String>,
    pub aggregated_output: StreamOutput<String>,
    pub duration: Duration,
+    pub timed_out: bool,
 }

 async fn exec(
@@ -298,22 +316,24 @@ async fn consume_truncated_output(
        Some(agg_tx.clone()),
    ));

-    let exit_status = tokio::select! {
+    let (exit_status, timed_out) = tokio::select! {
        result = tokio::time::timeout(timeout, child.wait()) => {
            match result {
-                Ok(Ok(exit_status)) => exit_status,
-                Ok(e) => e?,
+                Ok(status_result) => {
+                    let exit_status = status_result?;
+                    (exit_status, false)
+                }
                Err(_) => {
                    // timeout
                    child.start_kill()?;
                    // Debatable whether `child.wait().await` should be called here.
-                    synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE)
+                    (synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
                }
            }
        }
        _ = tokio::signal::ctrl_c() => {
            child.start_kill()?;
-            synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE)
+            (synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE), false)
        }
    };

@@ -336,6 +356,7 @@ async fn consume_truncated_output(
        stdout,
        stderr,
        aggregated_output,
+        timed_out,
    })
 }

--- a/codex-rs/core/tests/suite/exec_stream_events.rs
+++ b/codex-rs/core/tests/suite/exec_stream_events.rs
@@ -2,8 +2,11 @@

 use std::collections::HashMap;
 use std::path::PathBuf;
+use std::time::Duration;

 use async_channel::Receiver;
+use codex_core::error::CodexErr;
+use codex_core::error::SandboxErr;
 use codex_core::exec::ExecParams;
 use codex_core::exec::SandboxType;
 use codex_core::exec::StdoutStream;
@@ -170,3 +173,36 @@ async fn test_aggregated_output_interleaves_in_order() {
    assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n");
    assert_eq!(result.aggregated_output.truncated_after_lines, None);
 }
+
+#[tokio::test]
+async fn test_exec_timeout_returns_partial_output() {
+    let cmd = vec![
+        "/bin/sh".to_string(),
+        "-c".to_string(),
+        "printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(),
+    ];
+
+    let params = ExecParams {
+        command: cmd,
+        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
+        timeout_ms: Some(200),
+        env: HashMap::new(),
+        with_escalated_permissions: None,
+        justification: None,
+    };
+
+    let policy = SandboxPolicy::new_read_only_policy();
+
+    let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None).await;
+
+    let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else {
+        panic!("expected timeout error");
+    };
+
+    assert_eq!(output.exit_code, 124);
+    assert_eq!(output.stdout.text, "before\n");
+    assert!(output.stderr.text.is_empty());
+    assert_eq!(output.aggregated_output.text, "before\n");
+    assert!(output.duration >= Duration::from_millis(200));
+    assert!(output.timed_out);
+}
--- a/codex-rs/linux-sandbox/tests/suite/landlock.rs
+++ b/codex-rs/linux-sandbox/tests/suite/landlock.rs
@@ -121,7 +121,7 @@ async fn test_writable_root() {
 }

 #[tokio::test]
-#[should_panic(expected = "Sandbox(Timeout)")]
+#[should_panic(expected = "Sandbox(Timeout")]
 async fn test_timeout() {
    run_cmd(&["sleep", "2"], &[], 50).await;
 }
@@ -156,26 +156,27 @@ async fn assert_network_blocked(cmd: &[&str]) {
    )
    .await;

-    let (exit_code, stdout, stderr) = match result {
-        Ok(output) => (output.exit_code, output.stdout.text, output.stderr.text),
-        Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => {
-            (exit_code, stdout, stderr)
-        }
+    let output = match result {
+        Ok(output) => output,
+        Err(CodexErr::Sandbox(SandboxErr::Denied { output })) => *output,
        _ => {
            panic!("expected sandbox denied error, got: {result:?}");
        }
    };

-    dbg!(&stderr);
-    dbg!(&stdout);
-    dbg!(&exit_code);
+    dbg!(&output.stderr.text);
+    dbg!(&output.stdout.text);
+    dbg!(&output.exit_code);

    // A completely missing binary exits with 127.  Anything else should also
    // be non‑zero (EPERM from seccomp will usually bubble up as 1, 2, 13…)
    // If—*and only if*—the command exits 0 we consider the sandbox breached.

-    if exit_code == 0 {
-        panic!("Network sandbox FAILED - {cmd:?} exited 0\nstdout:\n{stdout}\nstderr:\n{stderr}",);
+    if output.exit_code == 0 {
+        panic!(
+            "Network sandbox FAILED - {cmd:?} exited 0\nstdout:\n{}\nstderr:\n{}",
+            output.stdout.text, output.stderr.text
+        );
    }
 }