[core] Stop escalating timeouts (#1853)

## Summary
Escalating out of sandbox is (almost always) not going to fix
long-running commands timing out - therefore we should just pass the
failure back to the model instead of asking the user to re-run a command
that took a long time anyway.

## Testing
- [x] Ran locally with a timeout and confirmed this worked as expected
This commit is contained in:
Dylan
2025-08-05 17:52:25 -07:00
committed by GitHub
parent f6c8d1117c
commit ea7d3f27bd
2 changed files with 34 additions and 19 deletions

View File

@@ -1957,6 +1957,20 @@ async fn handle_sandbox_error(
}; };
} }
// similarly, if the command timed out, we can simply return this failure to the model
if matches!(error, SandboxErr::Timeout) {
return ResponseInputItem::FunctionCallOutput {
call_id,
output: FunctionCallOutputPayload {
content: format!(
"command timed out after {} milliseconds",
params.timeout_duration().as_millis()
),
success: Some(false),
},
};
}
// Note that when `error` is `SandboxErr::Denied`, it could be a false // Note that when `error` is `SandboxErr::Denied`, it could be a false
// positive. That is, it may have exited with a non-zero exit code, not // positive. That is, it may have exited with a non-zero exit code, not
// because the sandbox denied it, but because that is its expected behavior, // because the sandbox denied it, but because that is its expected behavior,

View File

@@ -51,6 +51,12 @@ pub struct ExecParams {
pub env: HashMap<String, String>, pub env: HashMap<String, String>,
} }
impl ExecParams {
pub fn timeout_duration(&self) -> Duration {
Duration::from_millis(self.timeout_ms.unwrap_or(DEFAULT_TIMEOUT_MS))
}
}
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub enum SandboxType { pub enum SandboxType {
None, None,
@@ -83,11 +89,9 @@ pub async fn process_exec_tool_call(
{ {
SandboxType::None => exec(params, sandbox_policy, ctrl_c, stdout_stream.clone()).await, SandboxType::None => exec(params, sandbox_policy, ctrl_c, stdout_stream.clone()).await,
SandboxType::MacosSeatbelt => { SandboxType::MacosSeatbelt => {
let timeout = params.timeout_duration();
let ExecParams { let ExecParams {
command, command, cwd, env, ..
cwd,
timeout_ms,
env,
} = params; } = params;
let child = spawn_command_under_seatbelt( let child = spawn_command_under_seatbelt(
command, command,
@@ -97,14 +101,12 @@ pub async fn process_exec_tool_call(
env, env,
) )
.await?; .await?;
consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream.clone()).await consume_truncated_output(child, ctrl_c, timeout, stdout_stream.clone()).await
} }
SandboxType::LinuxSeccomp => { SandboxType::LinuxSeccomp => {
let timeout = params.timeout_duration();
let ExecParams { let ExecParams {
command, command, cwd, env, ..
cwd,
timeout_ms,
env,
} = params; } = params;
let codex_linux_sandbox_exe = codex_linux_sandbox_exe let codex_linux_sandbox_exe = codex_linux_sandbox_exe
@@ -120,7 +122,7 @@ pub async fn process_exec_tool_call(
) )
.await?; .await?;
consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream).await consume_truncated_output(child, ctrl_c, timeout, stdout_stream).await
} }
}; };
let duration = start.elapsed(); let duration = start.elapsed();
@@ -255,16 +257,16 @@ pub struct ExecToolCallOutput {
} }
async fn exec( async fn exec(
ExecParams { params: ExecParams,
command,
cwd,
timeout_ms,
env,
}: ExecParams,
sandbox_policy: &SandboxPolicy, sandbox_policy: &SandboxPolicy,
ctrl_c: Arc<Notify>, ctrl_c: Arc<Notify>,
stdout_stream: Option<StdoutStream>, stdout_stream: Option<StdoutStream>,
) -> Result<RawExecToolCallOutput> { ) -> Result<RawExecToolCallOutput> {
let timeout = params.timeout_duration();
let ExecParams {
command, cwd, env, ..
} = params;
let (program, args) = command.split_first().ok_or_else(|| { let (program, args) = command.split_first().ok_or_else(|| {
CodexErr::Io(io::Error::new( CodexErr::Io(io::Error::new(
io::ErrorKind::InvalidInput, io::ErrorKind::InvalidInput,
@@ -282,7 +284,7 @@ async fn exec(
env, env,
) )
.await?; .await?;
consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream).await consume_truncated_output(child, ctrl_c, timeout, stdout_stream).await
} }
/// Consumes the output of a child process, truncating it so it is suitable for /// Consumes the output of a child process, truncating it so it is suitable for
@@ -290,7 +292,7 @@ async fn exec(
pub(crate) async fn consume_truncated_output( pub(crate) async fn consume_truncated_output(
mut child: Child, mut child: Child,
ctrl_c: Arc<Notify>, ctrl_c: Arc<Notify>,
timeout_ms: Option<u64>, timeout: Duration,
stdout_stream: Option<StdoutStream>, stdout_stream: Option<StdoutStream>,
) -> Result<RawExecToolCallOutput> { ) -> Result<RawExecToolCallOutput> {
// Both stdout and stderr were configured with `Stdio::piped()` // Both stdout and stderr were configured with `Stdio::piped()`
@@ -324,7 +326,6 @@ pub(crate) async fn consume_truncated_output(
)); ));
let interrupted = ctrl_c.notified(); let interrupted = ctrl_c.notified();
let timeout = Duration::from_millis(timeout_ms.unwrap_or(DEFAULT_TIMEOUT_MS));
let exit_status = tokio::select! { let exit_status = tokio::select! {
result = tokio::time::timeout(timeout, child.wait()) => { result = tokio::time::timeout(timeout, child.wait()) => {
match result { match result {