Include command output when sending timeout to model (#3576)

Being able to see the output helps the model decide how to handle the
timeout.
This commit is contained in:
pakrym-oai
2025-09-14 14:38:26 -07:00
committed by GitHub
parent 7e1543f5d8
commit 863d9c237e
5 changed files with 147 additions and 50 deletions

View File

@@ -845,6 +845,7 @@ impl Session {
aggregated_output,
duration,
exit_code,
timed_out: _,
} = output;
// Send full stdout/stderr to clients; do not truncate.
let stdout = stdout.text.clone();
@@ -920,6 +921,7 @@ impl Session {
let output_stderr;
let borrowed: &ExecToolCallOutput = match &result {
Ok(output) => output,
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
Err(e) => {
output_stderr = ExecToolCallOutput {
exit_code: -1,
@@ -927,6 +929,7 @@ impl Session {
stderr: StreamOutput::new(get_error_message_ui(e)),
aggregated_output: StreamOutput::new(get_error_message_ui(e)),
duration: Duration::default(),
timed_out: false,
};
&output_stderr
}
@@ -2917,15 +2920,12 @@ async fn handle_sandbox_error(
let sub_id = exec_command_context.sub_id.clone();
let cwd = exec_command_context.cwd.clone();
// if the command timed out, we can simply return this failure to the model
if matches!(error, SandboxErr::Timeout) {
if let SandboxErr::Timeout { output } = &error {
let content = format_exec_output(output);
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!(
"command timed out after {} milliseconds",
params.timeout_duration().as_millis()
),
content,
success: Some(false),
},
};
@@ -3050,7 +3050,17 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.
let s = aggregated_output.text.as_str();
let mut s = &aggregated_output.text;
let prefixed_str: String;
if exec_output.timed_out {
prefixed_str = format!(
"command timed out after {} milliseconds\n",
exec_output.duration.as_millis()
) + s;
s = &prefixed_str;
}
let total_lines = s.lines().count();
if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
return s.to_string();
@@ -3093,6 +3103,7 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
// Build final string respecting byte budgets
let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget);
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));
result.push_str(head_part);
result.push_str(&marker);
@@ -3342,6 +3353,7 @@ mod tests {
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(full),
duration: StdDuration::from_secs(1),
timed_out: false,
};
let out = format_exec_output_str(&exec);
@@ -3384,6 +3396,7 @@ mod tests {
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new(full.clone()),
duration: StdDuration::from_secs(1),
timed_out: false,
};
let out = format_exec_output_str(&exec);
@@ -3406,6 +3419,25 @@ mod tests {
);
}
#[test]
fn includes_timed_out_message() {
let exec = ExecToolCallOutput {
exit_code: 0,
stdout: StreamOutput::new(String::new()),
stderr: StreamOutput::new(String::new()),
aggregated_output: StreamOutput::new("Command output".to_string()),
duration: StdDuration::from_secs(1),
timed_out: true,
};
let out = format_exec_output_str(&exec);
assert_eq!(
out,
"command timed out after 1000 milliseconds\nCommand output"
);
}
#[test]
fn falls_back_to_content_when_structured_is_null() {
let ctr = CallToolResult {

View File

@@ -1,3 +1,4 @@
use crate::exec::ExecToolCallOutput;
use crate::token_data::KnownPlan;
use crate::token_data::PlanType;
use codex_protocol::mcp_protocol::ConversationId;
@@ -13,8 +14,11 @@ pub type Result<T> = std::result::Result<T, CodexErr>;
#[derive(Error, Debug)]
pub enum SandboxErr {
/// Error from sandbox execution
#[error("sandbox denied exec error, exit code: {0}, stdout: {1}, stderr: {2}")]
Denied(i32, String, String),
#[error(
"sandbox denied exec error, exit code: {}, stdout: {}, stderr: {}",
.output.exit_code, .output.stdout.text, .output.stderr.text
)]
Denied { output: Box<ExecToolCallOutput> },
/// Error from linux seccomp filter setup
#[cfg(target_os = "linux")]
@@ -28,7 +32,7 @@ pub enum SandboxErr {
/// Command timed out
#[error("command timed out")]
Timeout,
Timeout { output: Box<ExecToolCallOutput> },
/// Command was killed by a signal
#[error("command was killed by a signal")]
@@ -245,9 +249,12 @@ impl CodexErr {
pub fn get_error_message_ui(e: &CodexErr) -> String {
match e {
CodexErr::Sandbox(SandboxErr::Denied(_, _, stderr)) => stderr.to_string(),
CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
// Timeouts are not sandbox errors from a UX perspective; present them plainly
CodexErr::Sandbox(SandboxErr::Timeout) => "error: command timed out".to_string(),
CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
"error: command timed out after {} ms",
output.duration.as_millis()
),
_ => e.to_string(),
}
}

View File

@@ -34,6 +34,7 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000;
const SIGKILL_CODE: i32 = 9;
const TIMEOUT_CODE: i32 = 64;
const EXIT_CODE_SIGNAL_BASE: i32 = 128; // conventional shell: 128 + signal
const EXEC_TIMEOUT_EXIT_CODE: i32 = 124; // conventional timeout exit code
// I/O buffer sizing
const READ_CHUNK_SIZE: usize = 8192; // bytes per read
@@ -86,11 +87,12 @@ pub async fn process_exec_tool_call(
) -> Result<ExecToolCallOutput> {
let start = Instant::now();
let timeout_duration = params.timeout_duration();
let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
{
SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await,
SandboxType::MacosSeatbelt => {
let timeout = params.timeout_duration();
let ExecParams {
command, cwd, env, ..
} = params;
@@ -102,10 +104,9 @@ pub async fn process_exec_tool_call(
env,
)
.await?;
consume_truncated_output(child, timeout, stdout_stream.clone()).await
consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await
}
SandboxType::LinuxSeccomp => {
let timeout = params.timeout_duration();
let ExecParams {
command, cwd, env, ..
} = params;
@@ -123,41 +124,56 @@ pub async fn process_exec_tool_call(
)
.await?;
consume_truncated_output(child, timeout, stdout_stream).await
consume_truncated_output(child, timeout_duration, stdout_stream).await
}
};
let duration = start.elapsed();
match raw_output_result {
Ok(raw_output) => {
let stdout = raw_output.stdout.from_utf8_lossy();
let stderr = raw_output.stderr.from_utf8_lossy();
#[allow(unused_mut)]
let mut timed_out = raw_output.timed_out;
#[cfg(target_family = "unix")]
match raw_output.exit_status.signal() {
Some(TIMEOUT_CODE) => return Err(CodexErr::Sandbox(SandboxErr::Timeout)),
Some(signal) => {
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
{
if let Some(signal) = raw_output.exit_status.signal() {
if signal == TIMEOUT_CODE {
timed_out = true;
} else {
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
}
}
None => {}
}
let exit_code = raw_output.exit_status.code().unwrap_or(-1);
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
return Err(CodexErr::Sandbox(SandboxErr::Denied(
exit_code,
stdout.text,
stderr.text,
)));
let mut exit_code = raw_output.exit_status.code().unwrap_or(-1);
if timed_out {
exit_code = EXEC_TIMEOUT_EXIT_CODE;
}
Ok(ExecToolCallOutput {
let stdout = raw_output.stdout.from_utf8_lossy();
let stderr = raw_output.stderr.from_utf8_lossy();
let aggregated_output = raw_output.aggregated_output.from_utf8_lossy();
let exec_output = ExecToolCallOutput {
exit_code,
stdout,
stderr,
aggregated_output: raw_output.aggregated_output.from_utf8_lossy(),
aggregated_output,
duration,
})
timed_out,
};
if timed_out {
return Err(CodexErr::Sandbox(SandboxErr::Timeout {
output: Box::new(exec_output),
}));
}
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
return Err(CodexErr::Sandbox(SandboxErr::Denied {
output: Box::new(exec_output),
}));
}
Ok(exec_output)
}
Err(err) => {
tracing::error!("exec error: {err}");
@@ -197,6 +213,7 @@ struct RawExecToolCallOutput {
pub stdout: StreamOutput<Vec<u8>>,
pub stderr: StreamOutput<Vec<u8>>,
pub aggregated_output: StreamOutput<Vec<u8>>,
pub timed_out: bool,
}
impl StreamOutput<String> {
@@ -229,6 +246,7 @@ pub struct ExecToolCallOutput {
pub stderr: StreamOutput<String>,
pub aggregated_output: StreamOutput<String>,
pub duration: Duration,
pub timed_out: bool,
}
async fn exec(
@@ -298,22 +316,24 @@ async fn consume_truncated_output(
Some(agg_tx.clone()),
));
let exit_status = tokio::select! {
let (exit_status, timed_out) = tokio::select! {
result = tokio::time::timeout(timeout, child.wait()) => {
match result {
Ok(Ok(exit_status)) => exit_status,
Ok(e) => e?,
Ok(status_result) => {
let exit_status = status_result?;
(exit_status, false)
}
Err(_) => {
// timeout
child.start_kill()?;
// Debatable whether `child.wait().await` should be called here.
synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE)
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
}
}
}
_ = tokio::signal::ctrl_c() => {
child.start_kill()?;
synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE)
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE), false)
}
};
@@ -336,6 +356,7 @@ async fn consume_truncated_output(
stdout,
stderr,
aggregated_output,
timed_out,
})
}

View File

@@ -2,8 +2,11 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;
use async_channel::Receiver;
use codex_core::error::CodexErr;
use codex_core::error::SandboxErr;
use codex_core::exec::ExecParams;
use codex_core::exec::SandboxType;
use codex_core::exec::StdoutStream;
@@ -170,3 +173,36 @@ async fn test_aggregated_output_interleaves_in_order() {
assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n");
assert_eq!(result.aggregated_output.truncated_after_lines, None);
}
#[tokio::test]
async fn test_exec_timeout_returns_partial_output() {
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(),
];
let params = ExecParams {
command: cmd,
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
timeout_ms: Some(200),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None).await;
let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else {
panic!("expected timeout error");
};
assert_eq!(output.exit_code, 124);
assert_eq!(output.stdout.text, "before\n");
assert!(output.stderr.text.is_empty());
assert_eq!(output.aggregated_output.text, "before\n");
assert!(output.duration >= Duration::from_millis(200));
assert!(output.timed_out);
}

View File

@@ -121,7 +121,7 @@ async fn test_writable_root() {
}
#[tokio::test]
#[should_panic(expected = "Sandbox(Timeout)")]
#[should_panic(expected = "Sandbox(Timeout")]
async fn test_timeout() {
run_cmd(&["sleep", "2"], &[], 50).await;
}
@@ -156,26 +156,27 @@ async fn assert_network_blocked(cmd: &[&str]) {
)
.await;
let (exit_code, stdout, stderr) = match result {
Ok(output) => (output.exit_code, output.stdout.text, output.stderr.text),
Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => {
(exit_code, stdout, stderr)
}
let output = match result {
Ok(output) => output,
Err(CodexErr::Sandbox(SandboxErr::Denied { output })) => *output,
_ => {
panic!("expected sandbox denied error, got: {result:?}");
}
};
dbg!(&stderr);
dbg!(&stdout);
dbg!(&exit_code);
dbg!(&output.stderr.text);
dbg!(&output.stdout.text);
dbg!(&output.exit_code);
// A completely missing binary exits with 127. Anything else should also
// be nonzero (EPERM from seccomp will usually bubble up as 1, 2, 13…)
// If—*and only if*—the command exits 0 we consider the sandbox breached.
if exit_code == 0 {
panic!("Network sandbox FAILED - {cmd:?} exited 0\nstdout:\n{stdout}\nstderr:\n{stderr}",);
if output.exit_code == 0 {
panic!(
"Network sandbox FAILED - {cmd:?} exited 0\nstdout:\n{}\nstderr:\n{}",
output.stdout.text, output.stderr.text
);
}
}