[codex-rs] Improve linux sandbox timeouts (#662)
* Fixes flaking rust unit test * Adds explicit sandbox exec timeout handling
This commit is contained in:
@@ -22,6 +22,14 @@ pub enum SandboxErr {
|
||||
#[error("seccomp backend error")]
|
||||
SeccompBackend(#[from] seccompiler::BackendError),
|
||||
|
||||
/// Command timed out
|
||||
#[error("command timed out")]
|
||||
Timeout,
|
||||
|
||||
/// Command was killed by a signal
|
||||
#[error("command was killed by a signal")]
|
||||
Signal(i32),
|
||||
|
||||
/// Error from linux landlock
|
||||
#[error("Landlock was not able to fully enforce all sandbox rules")]
|
||||
LandlockRestrict,
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
use std::io;
|
||||
#[cfg(target_family = "unix")]
|
||||
use std::os::unix::process::ExitStatusExt;
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitStatus;
|
||||
use std::process::Stdio;
|
||||
@@ -18,17 +20,18 @@ use crate::error::Result;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
|
||||
/// Maximum we send for each stream, which is either:
|
||||
/// - 10KiB OR
|
||||
/// - 256 lines
|
||||
// Maximum we send for each stream, which is either:
|
||||
// - 10KiB OR
|
||||
// - 256 lines
|
||||
const MAX_STREAM_OUTPUT: usize = 10 * 1024;
|
||||
const MAX_STREAM_OUTPUT_LINES: usize = 256;
|
||||
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 10_000;
|
||||
|
||||
/// Hardcode this since it does not seem worth including the libc craate just
|
||||
/// for this.
|
||||
// Hardcode these since it does not seem worth including the libc crate just
|
||||
// for these.
|
||||
const SIGKILL_CODE: i32 = 9;
|
||||
const TIMEOUT_CODE: i32 = 64;
|
||||
|
||||
const MACOS_SEATBELT_READONLY_POLICY: &str = include_str!("seatbelt_readonly_policy.sbpl");
|
||||
|
||||
@@ -113,10 +116,20 @@ pub async fn process_exec_tool_call(
|
||||
let duration = start.elapsed();
|
||||
match raw_output_result {
|
||||
Ok(raw_output) => {
|
||||
let exit_code = raw_output.exit_status.code().unwrap_or(-1);
|
||||
let stdout = String::from_utf8_lossy(&raw_output.stdout).to_string();
|
||||
let stderr = String::from_utf8_lossy(&raw_output.stderr).to_string();
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
match raw_output.exit_status.signal() {
|
||||
Some(TIMEOUT_CODE) => return Err(CodexErr::Sandbox(SandboxErr::Timeout)),
|
||||
Some(signal) => {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
|
||||
let exit_code = raw_output.exit_status.code().unwrap_or(-1);
|
||||
|
||||
// NOTE(ragona): This is much less restrictive than the previous check. If we exec
|
||||
// a command, and it returns anything other than success, we assume that it may have
|
||||
// been a sandboxing error and allow the user to retry. (The user of course may choose
|
||||
@@ -244,7 +257,7 @@ pub async fn exec(
|
||||
// timeout
|
||||
child.start_kill()?;
|
||||
// Debatable whether `child.wait().await` should be called here.
|
||||
synthetic_exit_status(128 + SIGKILL_CODE)
|
||||
synthetic_exit_status(128 + TIMEOUT_CODE)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,11 +168,11 @@ mod tests_linux {
|
||||
use tokio::sync::Notify;
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf]) {
|
||||
async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
|
||||
let params = ExecParams {
|
||||
command: cmd.iter().map(|elm| elm.to_string()).collect(),
|
||||
workdir: None,
|
||||
timeout_ms: Some(200),
|
||||
timeout_ms: Some(timeout_ms),
|
||||
};
|
||||
let res = process_exec_tool_call(
|
||||
params,
|
||||
@@ -193,7 +193,7 @@ mod tests_linux {
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_root_read() {
|
||||
run_cmd(&["ls", "-l", "/bin"], &[]).await;
|
||||
run_cmd(&["ls", "-l", "/bin"], &[], 200).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -204,13 +204,14 @@ mod tests_linux {
|
||||
run_cmd(
|
||||
&["bash", "-lc", &format!("echo blah > {}", tmpfile_path)],
|
||||
&[],
|
||||
200,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_dev_null_write() {
|
||||
run_cmd(&["echo", "blah", ">", "/dev/null"], &[]).await;
|
||||
run_cmd(&["echo", "blah", ">", "/dev/null"], &[], 200).await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -224,10 +225,17 @@ mod tests_linux {
|
||||
&format!("echo blah > {}", file_path.to_string_lossy()),
|
||||
],
|
||||
&[tmpdir.path().to_path_buf()],
|
||||
500,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[should_panic(expected = "Sandbox(Timeout)")]
|
||||
async fn test_timeout() {
|
||||
run_cmd(&["sleep", "2"], &[], 50).await;
|
||||
}
|
||||
|
||||
/// Helper that runs `cmd` under the Linux sandbox and asserts that the command
|
||||
/// does NOT succeed (i.e. returns a non‑zero exit code) **unless** the binary
|
||||
/// is missing in which case we silently treat it as an accepted skip so the
|
||||
|
||||
Reference in New Issue
Block a user