diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 4745c4da..7177348f 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -845,6 +845,7 @@ impl Session { aggregated_output, duration, exit_code, + timed_out: _, } = output; // Send full stdout/stderr to clients; do not truncate. let stdout = stdout.text.clone(); @@ -920,6 +921,7 @@ impl Session { let output_stderr; let borrowed: &ExecToolCallOutput = match &result { Ok(output) => output, + Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output, Err(e) => { output_stderr = ExecToolCallOutput { exit_code: -1, @@ -927,6 +929,7 @@ impl Session { stderr: StreamOutput::new(get_error_message_ui(e)), aggregated_output: StreamOutput::new(get_error_message_ui(e)), duration: Duration::default(), + timed_out: false, }; &output_stderr } @@ -2917,15 +2920,12 @@ async fn handle_sandbox_error( let sub_id = exec_command_context.sub_id.clone(); let cwd = exec_command_context.cwd.clone(); - // if the command timed out, we can simply return this failure to the model - if matches!(error, SandboxErr::Timeout) { + if let SandboxErr::Timeout { output } = &error { + let content = format_exec_output(output); return ResponseInputItem::FunctionCallOutput { call_id, output: FunctionCallOutputPayload { - content: format!( - "command timed out after {} milliseconds", - params.timeout_duration().as_millis() - ), + content, success: Some(false), }, }; @@ -3050,7 +3050,17 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { // Head+tail truncation for the model: show the beginning and end with an elision. // Clients still receive full streams; only this formatted summary is capped. - let s = aggregated_output.text.as_str(); + let mut s = &aggregated_output.text; + let prefixed_str: String; + + if exec_output.timed_out { + prefixed_str = format!( + "command timed out after {} milliseconds\n", + exec_output.duration.as_millis() + ) + s; + s = &prefixed_str; + } + let total_lines = s.lines().count(); if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES { return s.to_string(); @@ -3093,6 +3103,7 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { // Build final string respecting byte budgets let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget); let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len())); + result.push_str(head_part); result.push_str(&marker); @@ -3342,6 +3353,7 @@ mod tests { stderr: StreamOutput::new(String::new()), aggregated_output: StreamOutput::new(full), duration: StdDuration::from_secs(1), + timed_out: false, }; let out = format_exec_output_str(&exec); @@ -3384,6 +3396,7 @@ mod tests { stderr: StreamOutput::new(String::new()), aggregated_output: StreamOutput::new(full.clone()), duration: StdDuration::from_secs(1), + timed_out: false, }; let out = format_exec_output_str(&exec); @@ -3406,6 +3419,25 @@ mod tests { ); } + #[test] + fn includes_timed_out_message() { + let exec = ExecToolCallOutput { + exit_code: 0, + stdout: StreamOutput::new(String::new()), + stderr: StreamOutput::new(String::new()), + aggregated_output: StreamOutput::new("Command output".to_string()), + duration: StdDuration::from_secs(1), + timed_out: true, + }; + + let out = format_exec_output_str(&exec); + + assert_eq!( + out, + "command timed out after 1000 milliseconds\nCommand output" + ); + } + #[test] fn falls_back_to_content_when_structured_is_null() { let ctr = CallToolResult { diff --git a/codex-rs/core/src/error.rs b/codex-rs/core/src/error.rs index b1cda024..77b447e5 100644 --- a/codex-rs/core/src/error.rs +++ b/codex-rs/core/src/error.rs @@ -1,3 +1,4 @@ +use crate::exec::ExecToolCallOutput; use crate::token_data::KnownPlan; use crate::token_data::PlanType; use codex_protocol::mcp_protocol::ConversationId; @@ -13,8 +14,11 @@ pub type Result = std::result::Result; #[derive(Error, Debug)] pub enum SandboxErr { /// Error from sandbox execution - #[error("sandbox denied exec error, exit code: {0}, stdout: {1}, stderr: {2}")] - Denied(i32, String, String), + #[error( + "sandbox denied exec error, exit code: {}, stdout: {}, stderr: {}", + .output.exit_code, .output.stdout.text, .output.stderr.text + )] + Denied { output: Box }, /// Error from linux seccomp filter setup #[cfg(target_os = "linux")] @@ -28,7 +32,7 @@ pub enum SandboxErr { /// Command timed out #[error("command timed out")] - Timeout, + Timeout { output: Box }, /// Command was killed by a signal #[error("command was killed by a signal")] @@ -245,9 +249,12 @@ impl CodexErr { pub fn get_error_message_ui(e: &CodexErr) -> String { match e { - CodexErr::Sandbox(SandboxErr::Denied(_, _, stderr)) => stderr.to_string(), + CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(), // Timeouts are not sandbox errors from a UX perspective; present them plainly - CodexErr::Sandbox(SandboxErr::Timeout) => "error: command timed out".to_string(), + CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!( + "error: command timed out after {} ms", + output.duration.as_millis() + ), _ => e.to_string(), } } diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 3b407667..8fecb16f 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -34,6 +34,7 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000; const SIGKILL_CODE: i32 = 9; const TIMEOUT_CODE: i32 = 64; const EXIT_CODE_SIGNAL_BASE: i32 = 128; // conventional shell: 128 + signal +const EXEC_TIMEOUT_EXIT_CODE: i32 = 124; // conventional timeout exit code // I/O buffer sizing const READ_CHUNK_SIZE: usize = 8192; // bytes per read @@ -86,11 +87,12 @@ pub async fn process_exec_tool_call( ) -> Result { let start = Instant::now(); + let timeout_duration = params.timeout_duration(); + let raw_output_result: std::result::Result = match sandbox_type { SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await, SandboxType::MacosSeatbelt => { - let timeout = params.timeout_duration(); let ExecParams { command, cwd, env, .. } = params; @@ -102,10 +104,9 @@ pub async fn process_exec_tool_call( env, ) .await?; - consume_truncated_output(child, timeout, stdout_stream.clone()).await + consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await } SandboxType::LinuxSeccomp => { - let timeout = params.timeout_duration(); let ExecParams { command, cwd, env, .. } = params; @@ -123,41 +124,56 @@ pub async fn process_exec_tool_call( ) .await?; - consume_truncated_output(child, timeout, stdout_stream).await + consume_truncated_output(child, timeout_duration, stdout_stream).await } }; let duration = start.elapsed(); match raw_output_result { Ok(raw_output) => { - let stdout = raw_output.stdout.from_utf8_lossy(); - let stderr = raw_output.stderr.from_utf8_lossy(); + #[allow(unused_mut)] + let mut timed_out = raw_output.timed_out; #[cfg(target_family = "unix")] - match raw_output.exit_status.signal() { - Some(TIMEOUT_CODE) => return Err(CodexErr::Sandbox(SandboxErr::Timeout)), - Some(signal) => { - return Err(CodexErr::Sandbox(SandboxErr::Signal(signal))); + { + if let Some(signal) = raw_output.exit_status.signal() { + if signal == TIMEOUT_CODE { + timed_out = true; + } else { + return Err(CodexErr::Sandbox(SandboxErr::Signal(signal))); + } } - None => {} } - let exit_code = raw_output.exit_status.code().unwrap_or(-1); - - if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) { - return Err(CodexErr::Sandbox(SandboxErr::Denied( - exit_code, - stdout.text, - stderr.text, - ))); + let mut exit_code = raw_output.exit_status.code().unwrap_or(-1); + if timed_out { + exit_code = EXEC_TIMEOUT_EXIT_CODE; } - Ok(ExecToolCallOutput { + let stdout = raw_output.stdout.from_utf8_lossy(); + let stderr = raw_output.stderr.from_utf8_lossy(); + let aggregated_output = raw_output.aggregated_output.from_utf8_lossy(); + let exec_output = ExecToolCallOutput { exit_code, stdout, stderr, - aggregated_output: raw_output.aggregated_output.from_utf8_lossy(), + aggregated_output, duration, - }) + timed_out, + }; + + if timed_out { + return Err(CodexErr::Sandbox(SandboxErr::Timeout { + output: Box::new(exec_output), + })); + } + + if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) { + return Err(CodexErr::Sandbox(SandboxErr::Denied { + output: Box::new(exec_output), + })); + } + + Ok(exec_output) } Err(err) => { tracing::error!("exec error: {err}"); @@ -197,6 +213,7 @@ struct RawExecToolCallOutput { pub stdout: StreamOutput>, pub stderr: StreamOutput>, pub aggregated_output: StreamOutput>, + pub timed_out: bool, } impl StreamOutput { @@ -229,6 +246,7 @@ pub struct ExecToolCallOutput { pub stderr: StreamOutput, pub aggregated_output: StreamOutput, pub duration: Duration, + pub timed_out: bool, } async fn exec( @@ -298,22 +316,24 @@ async fn consume_truncated_output( Some(agg_tx.clone()), )); - let exit_status = tokio::select! { + let (exit_status, timed_out) = tokio::select! { result = tokio::time::timeout(timeout, child.wait()) => { match result { - Ok(Ok(exit_status)) => exit_status, - Ok(e) => e?, + Ok(status_result) => { + let exit_status = status_result?; + (exit_status, false) + } Err(_) => { // timeout child.start_kill()?; // Debatable whether `child.wait().await` should be called here. - synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE) + (synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true) } } } _ = tokio::signal::ctrl_c() => { child.start_kill()?; - synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE) + (synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE), false) } }; @@ -336,6 +356,7 @@ async fn consume_truncated_output( stdout, stderr, aggregated_output, + timed_out, }) } diff --git a/codex-rs/core/tests/suite/exec_stream_events.rs b/codex-rs/core/tests/suite/exec_stream_events.rs index 521823d2..0b3edf2e 100644 --- a/codex-rs/core/tests/suite/exec_stream_events.rs +++ b/codex-rs/core/tests/suite/exec_stream_events.rs @@ -2,8 +2,11 @@ use std::collections::HashMap; use std::path::PathBuf; +use std::time::Duration; use async_channel::Receiver; +use codex_core::error::CodexErr; +use codex_core::error::SandboxErr; use codex_core::exec::ExecParams; use codex_core::exec::SandboxType; use codex_core::exec::StdoutStream; @@ -170,3 +173,36 @@ async fn test_aggregated_output_interleaves_in_order() { assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n"); assert_eq!(result.aggregated_output.truncated_after_lines, None); } + +#[tokio::test] +async fn test_exec_timeout_returns_partial_output() { + let cmd = vec![ + "/bin/sh".to_string(), + "-c".to_string(), + "printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(), + ]; + + let params = ExecParams { + command: cmd, + cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")), + timeout_ms: Some(200), + env: HashMap::new(), + with_escalated_permissions: None, + justification: None, + }; + + let policy = SandboxPolicy::new_read_only_policy(); + + let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None).await; + + let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else { + panic!("expected timeout error"); + }; + + assert_eq!(output.exit_code, 124); + assert_eq!(output.stdout.text, "before\n"); + assert!(output.stderr.text.is_empty()); + assert_eq!(output.aggregated_output.text, "before\n"); + assert!(output.duration >= Duration::from_millis(200)); + assert!(output.timed_out); +} diff --git a/codex-rs/linux-sandbox/tests/suite/landlock.rs b/codex-rs/linux-sandbox/tests/suite/landlock.rs index 8faa02d1..fddb43c3 100644 --- a/codex-rs/linux-sandbox/tests/suite/landlock.rs +++ b/codex-rs/linux-sandbox/tests/suite/landlock.rs @@ -121,7 +121,7 @@ async fn test_writable_root() { } #[tokio::test] -#[should_panic(expected = "Sandbox(Timeout)")] +#[should_panic(expected = "Sandbox(Timeout")] async fn test_timeout() { run_cmd(&["sleep", "2"], &[], 50).await; } @@ -156,26 +156,27 @@ async fn assert_network_blocked(cmd: &[&str]) { ) .await; - let (exit_code, stdout, stderr) = match result { - Ok(output) => (output.exit_code, output.stdout.text, output.stderr.text), - Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => { - (exit_code, stdout, stderr) - } + let output = match result { + Ok(output) => output, + Err(CodexErr::Sandbox(SandboxErr::Denied { output })) => *output, _ => { panic!("expected sandbox denied error, got: {result:?}"); } }; - dbg!(&stderr); - dbg!(&stdout); - dbg!(&exit_code); + dbg!(&output.stderr.text); + dbg!(&output.stdout.text); + dbg!(&output.exit_code); // A completely missing binary exits with 127. Anything else should also // be non‑zero (EPERM from seccomp will usually bubble up as 1, 2, 13…) // If—*and only if*—the command exits 0 we consider the sandbox breached. - if exit_code == 0 { - panic!("Network sandbox FAILED - {cmd:?} exited 0\nstdout:\n{stdout}\nstderr:\n{stderr}",); + if output.exit_code == 0 { + panic!( + "Network sandbox FAILED - {cmd:?} exited 0\nstdout:\n{}\nstderr:\n{}", + output.stdout.text, output.stderr.text + ); } }