Include command output when sending timeout to model (#3576)
Being able to see the output helps the model decide how to handle the timeout.
This commit is contained in:
@@ -845,6 +845,7 @@ impl Session {
|
||||
aggregated_output,
|
||||
duration,
|
||||
exit_code,
|
||||
timed_out: _,
|
||||
} = output;
|
||||
// Send full stdout/stderr to clients; do not truncate.
|
||||
let stdout = stdout.text.clone();
|
||||
@@ -920,6 +921,7 @@ impl Session {
|
||||
let output_stderr;
|
||||
let borrowed: &ExecToolCallOutput = match &result {
|
||||
Ok(output) => output,
|
||||
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
|
||||
Err(e) => {
|
||||
output_stderr = ExecToolCallOutput {
|
||||
exit_code: -1,
|
||||
@@ -927,6 +929,7 @@ impl Session {
|
||||
stderr: StreamOutput::new(get_error_message_ui(e)),
|
||||
aggregated_output: StreamOutput::new(get_error_message_ui(e)),
|
||||
duration: Duration::default(),
|
||||
timed_out: false,
|
||||
};
|
||||
&output_stderr
|
||||
}
|
||||
@@ -2917,15 +2920,12 @@ async fn handle_sandbox_error(
|
||||
let sub_id = exec_command_context.sub_id.clone();
|
||||
let cwd = exec_command_context.cwd.clone();
|
||||
|
||||
// if the command timed out, we can simply return this failure to the model
|
||||
if matches!(error, SandboxErr::Timeout) {
|
||||
if let SandboxErr::Timeout { output } = &error {
|
||||
let content = format_exec_output(output);
|
||||
return ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!(
|
||||
"command timed out after {} milliseconds",
|
||||
params.timeout_duration().as_millis()
|
||||
),
|
||||
content,
|
||||
success: Some(false),
|
||||
},
|
||||
};
|
||||
@@ -3050,7 +3050,17 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
|
||||
let s = aggregated_output.text.as_str();
|
||||
let mut s = &aggregated_output.text;
|
||||
let prefixed_str: String;
|
||||
|
||||
if exec_output.timed_out {
|
||||
prefixed_str = format!(
|
||||
"command timed out after {} milliseconds\n",
|
||||
exec_output.duration.as_millis()
|
||||
) + s;
|
||||
s = &prefixed_str;
|
||||
}
|
||||
|
||||
let total_lines = s.lines().count();
|
||||
if s.len() <= MODEL_FORMAT_MAX_BYTES && total_lines <= MODEL_FORMAT_MAX_LINES {
|
||||
return s.to_string();
|
||||
@@ -3093,6 +3103,7 @@ fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
// Build final string respecting byte budgets
|
||||
let head_part = take_bytes_at_char_boundary(&head_lines_text, head_budget);
|
||||
let mut result = String::with_capacity(MODEL_FORMAT_MAX_BYTES.min(s.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
result.push_str(&marker);
|
||||
|
||||
@@ -3342,6 +3353,7 @@ mod tests {
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(full),
|
||||
duration: StdDuration::from_secs(1),
|
||||
timed_out: false,
|
||||
};
|
||||
|
||||
let out = format_exec_output_str(&exec);
|
||||
@@ -3384,6 +3396,7 @@ mod tests {
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(full.clone()),
|
||||
duration: StdDuration::from_secs(1),
|
||||
timed_out: false,
|
||||
};
|
||||
|
||||
let out = format_exec_output_str(&exec);
|
||||
@@ -3406,6 +3419,25 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn includes_timed_out_message() {
|
||||
let exec = ExecToolCallOutput {
|
||||
exit_code: 0,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new("Command output".to_string()),
|
||||
duration: StdDuration::from_secs(1),
|
||||
timed_out: true,
|
||||
};
|
||||
|
||||
let out = format_exec_output_str(&exec);
|
||||
|
||||
assert_eq!(
|
||||
out,
|
||||
"command timed out after 1000 milliseconds\nCommand output"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn falls_back_to_content_when_structured_is_null() {
|
||||
let ctr = CallToolResult {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::token_data::KnownPlan;
|
||||
use crate::token_data::PlanType;
|
||||
use codex_protocol::mcp_protocol::ConversationId;
|
||||
@@ -13,8 +14,11 @@ pub type Result<T> = std::result::Result<T, CodexErr>;
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SandboxErr {
|
||||
/// Error from sandbox execution
|
||||
#[error("sandbox denied exec error, exit code: {0}, stdout: {1}, stderr: {2}")]
|
||||
Denied(i32, String, String),
|
||||
#[error(
|
||||
"sandbox denied exec error, exit code: {}, stdout: {}, stderr: {}",
|
||||
.output.exit_code, .output.stdout.text, .output.stderr.text
|
||||
)]
|
||||
Denied { output: Box<ExecToolCallOutput> },
|
||||
|
||||
/// Error from linux seccomp filter setup
|
||||
#[cfg(target_os = "linux")]
|
||||
@@ -28,7 +32,7 @@ pub enum SandboxErr {
|
||||
|
||||
/// Command timed out
|
||||
#[error("command timed out")]
|
||||
Timeout,
|
||||
Timeout { output: Box<ExecToolCallOutput> },
|
||||
|
||||
/// Command was killed by a signal
|
||||
#[error("command was killed by a signal")]
|
||||
@@ -245,9 +249,12 @@ impl CodexErr {
|
||||
|
||||
pub fn get_error_message_ui(e: &CodexErr) -> String {
|
||||
match e {
|
||||
CodexErr::Sandbox(SandboxErr::Denied(_, _, stderr)) => stderr.to_string(),
|
||||
CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
|
||||
// Timeouts are not sandbox errors from a UX perspective; present them plainly
|
||||
CodexErr::Sandbox(SandboxErr::Timeout) => "error: command timed out".to_string(),
|
||||
CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
|
||||
"error: command timed out after {} ms",
|
||||
output.duration.as_millis()
|
||||
),
|
||||
_ => e.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000;
|
||||
const SIGKILL_CODE: i32 = 9;
|
||||
const TIMEOUT_CODE: i32 = 64;
|
||||
const EXIT_CODE_SIGNAL_BASE: i32 = 128; // conventional shell: 128 + signal
|
||||
const EXEC_TIMEOUT_EXIT_CODE: i32 = 124; // conventional timeout exit code
|
||||
|
||||
// I/O buffer sizing
|
||||
const READ_CHUNK_SIZE: usize = 8192; // bytes per read
|
||||
@@ -86,11 +87,12 @@ pub async fn process_exec_tool_call(
|
||||
) -> Result<ExecToolCallOutput> {
|
||||
let start = Instant::now();
|
||||
|
||||
let timeout_duration = params.timeout_duration();
|
||||
|
||||
let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
|
||||
{
|
||||
SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await,
|
||||
SandboxType::MacosSeatbelt => {
|
||||
let timeout = params.timeout_duration();
|
||||
let ExecParams {
|
||||
command, cwd, env, ..
|
||||
} = params;
|
||||
@@ -102,10 +104,9 @@ pub async fn process_exec_tool_call(
|
||||
env,
|
||||
)
|
||||
.await?;
|
||||
consume_truncated_output(child, timeout, stdout_stream.clone()).await
|
||||
consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await
|
||||
}
|
||||
SandboxType::LinuxSeccomp => {
|
||||
let timeout = params.timeout_duration();
|
||||
let ExecParams {
|
||||
command, cwd, env, ..
|
||||
} = params;
|
||||
@@ -123,41 +124,56 @@ pub async fn process_exec_tool_call(
|
||||
)
|
||||
.await?;
|
||||
|
||||
consume_truncated_output(child, timeout, stdout_stream).await
|
||||
consume_truncated_output(child, timeout_duration, stdout_stream).await
|
||||
}
|
||||
};
|
||||
let duration = start.elapsed();
|
||||
match raw_output_result {
|
||||
Ok(raw_output) => {
|
||||
let stdout = raw_output.stdout.from_utf8_lossy();
|
||||
let stderr = raw_output.stderr.from_utf8_lossy();
|
||||
#[allow(unused_mut)]
|
||||
let mut timed_out = raw_output.timed_out;
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
match raw_output.exit_status.signal() {
|
||||
Some(TIMEOUT_CODE) => return Err(CodexErr::Sandbox(SandboxErr::Timeout)),
|
||||
Some(signal) => {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
|
||||
{
|
||||
if let Some(signal) = raw_output.exit_status.signal() {
|
||||
if signal == TIMEOUT_CODE {
|
||||
timed_out = true;
|
||||
} else {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
|
||||
}
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
|
||||
let exit_code = raw_output.exit_status.code().unwrap_or(-1);
|
||||
|
||||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Denied(
|
||||
exit_code,
|
||||
stdout.text,
|
||||
stderr.text,
|
||||
)));
|
||||
let mut exit_code = raw_output.exit_status.code().unwrap_or(-1);
|
||||
if timed_out {
|
||||
exit_code = EXEC_TIMEOUT_EXIT_CODE;
|
||||
}
|
||||
|
||||
Ok(ExecToolCallOutput {
|
||||
let stdout = raw_output.stdout.from_utf8_lossy();
|
||||
let stderr = raw_output.stderr.from_utf8_lossy();
|
||||
let aggregated_output = raw_output.aggregated_output.from_utf8_lossy();
|
||||
let exec_output = ExecToolCallOutput {
|
||||
exit_code,
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output: raw_output.aggregated_output.from_utf8_lossy(),
|
||||
aggregated_output,
|
||||
duration,
|
||||
})
|
||||
timed_out,
|
||||
};
|
||||
|
||||
if timed_out {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Timeout {
|
||||
output: Box::new(exec_output),
|
||||
}));
|
||||
}
|
||||
|
||||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Denied {
|
||||
output: Box::new(exec_output),
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(exec_output)
|
||||
}
|
||||
Err(err) => {
|
||||
tracing::error!("exec error: {err}");
|
||||
@@ -197,6 +213,7 @@ struct RawExecToolCallOutput {
|
||||
pub stdout: StreamOutput<Vec<u8>>,
|
||||
pub stderr: StreamOutput<Vec<u8>>,
|
||||
pub aggregated_output: StreamOutput<Vec<u8>>,
|
||||
pub timed_out: bool,
|
||||
}
|
||||
|
||||
impl StreamOutput<String> {
|
||||
@@ -229,6 +246,7 @@ pub struct ExecToolCallOutput {
|
||||
pub stderr: StreamOutput<String>,
|
||||
pub aggregated_output: StreamOutput<String>,
|
||||
pub duration: Duration,
|
||||
pub timed_out: bool,
|
||||
}
|
||||
|
||||
async fn exec(
|
||||
@@ -298,22 +316,24 @@ async fn consume_truncated_output(
|
||||
Some(agg_tx.clone()),
|
||||
));
|
||||
|
||||
let exit_status = tokio::select! {
|
||||
let (exit_status, timed_out) = tokio::select! {
|
||||
result = tokio::time::timeout(timeout, child.wait()) => {
|
||||
match result {
|
||||
Ok(Ok(exit_status)) => exit_status,
|
||||
Ok(e) => e?,
|
||||
Ok(status_result) => {
|
||||
let exit_status = status_result?;
|
||||
(exit_status, false)
|
||||
}
|
||||
Err(_) => {
|
||||
// timeout
|
||||
child.start_kill()?;
|
||||
// Debatable whether `child.wait().await` should be called here.
|
||||
synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE)
|
||||
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
|
||||
}
|
||||
}
|
||||
}
|
||||
_ = tokio::signal::ctrl_c() => {
|
||||
child.start_kill()?;
|
||||
synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE)
|
||||
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + SIGKILL_CODE), false)
|
||||
}
|
||||
};
|
||||
|
||||
@@ -336,6 +356,7 @@ async fn consume_truncated_output(
|
||||
stdout,
|
||||
stderr,
|
||||
aggregated_output,
|
||||
timed_out,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -2,8 +2,11 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use async_channel::Receiver;
|
||||
use codex_core::error::CodexErr;
|
||||
use codex_core::error::SandboxErr;
|
||||
use codex_core::exec::ExecParams;
|
||||
use codex_core::exec::SandboxType;
|
||||
use codex_core::exec::StdoutStream;
|
||||
@@ -170,3 +173,36 @@ async fn test_aggregated_output_interleaves_in_order() {
|
||||
assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n");
|
||||
assert_eq!(result.aggregated_output.truncated_after_lines, None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exec_timeout_returns_partial_output() {
|
||||
let cmd = vec![
|
||||
"/bin/sh".to_string(),
|
||||
"-c".to_string(),
|
||||
"printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(),
|
||||
];
|
||||
|
||||
let params = ExecParams {
|
||||
command: cmd,
|
||||
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
|
||||
timeout_ms: Some(200),
|
||||
env: HashMap::new(),
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
};
|
||||
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
let result = process_exec_tool_call(params, SandboxType::None, &policy, &None, None).await;
|
||||
|
||||
let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else {
|
||||
panic!("expected timeout error");
|
||||
};
|
||||
|
||||
assert_eq!(output.exit_code, 124);
|
||||
assert_eq!(output.stdout.text, "before\n");
|
||||
assert!(output.stderr.text.is_empty());
|
||||
assert_eq!(output.aggregated_output.text, "before\n");
|
||||
assert!(output.duration >= Duration::from_millis(200));
|
||||
assert!(output.timed_out);
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ async fn test_writable_root() {
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[should_panic(expected = "Sandbox(Timeout)")]
|
||||
#[should_panic(expected = "Sandbox(Timeout")]
|
||||
async fn test_timeout() {
|
||||
run_cmd(&["sleep", "2"], &[], 50).await;
|
||||
}
|
||||
@@ -156,26 +156,27 @@ async fn assert_network_blocked(cmd: &[&str]) {
|
||||
)
|
||||
.await;
|
||||
|
||||
let (exit_code, stdout, stderr) = match result {
|
||||
Ok(output) => (output.exit_code, output.stdout.text, output.stderr.text),
|
||||
Err(CodexErr::Sandbox(SandboxErr::Denied(exit_code, stdout, stderr))) => {
|
||||
(exit_code, stdout, stderr)
|
||||
}
|
||||
let output = match result {
|
||||
Ok(output) => output,
|
||||
Err(CodexErr::Sandbox(SandboxErr::Denied { output })) => *output,
|
||||
_ => {
|
||||
panic!("expected sandbox denied error, got: {result:?}");
|
||||
}
|
||||
};
|
||||
|
||||
dbg!(&stderr);
|
||||
dbg!(&stdout);
|
||||
dbg!(&exit_code);
|
||||
dbg!(&output.stderr.text);
|
||||
dbg!(&output.stdout.text);
|
||||
dbg!(&output.exit_code);
|
||||
|
||||
// A completely missing binary exits with 127. Anything else should also
|
||||
// be non‑zero (EPERM from seccomp will usually bubble up as 1, 2, 13…)
|
||||
// If—*and only if*—the command exits 0 we consider the sandbox breached.
|
||||
|
||||
if exit_code == 0 {
|
||||
panic!("Network sandbox FAILED - {cmd:?} exited 0\nstdout:\n{stdout}\nstderr:\n{stderr}",);
|
||||
if output.exit_code == 0 {
|
||||
panic!(
|
||||
"Network sandbox FAILED - {cmd:?} exited 0\nstdout:\n{}\nstderr:\n{}",
|
||||
output.stdout.text, output.stderr.text
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user