[codex-rs] Improve linux sandbox timeouts (#662)

* Fixes flaking rust unit test * Adds explicit sandbox exec timeout handling
2025-04-25 12:56:20 -07:00
parent 55e25abf78
commit 7d9de34bc7
3 changed files with 40 additions and 11 deletions
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -22,6 +22,14 @@ pub enum SandboxErr {
    #[error("seccomp backend error")]
    SeccompBackend(#[from] seccompiler::BackendError),

+    /// Command timed out
+    #[error("command timed out")]
+    Timeout,
+
+    /// Command was killed by a signal
+    #[error("command was killed by a signal")]
+    Signal(i32),
+
    /// Error from linux landlock
    #[error("Landlock was not able to fully enforce all sandbox rules")]
    LandlockRestrict,
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -1,4 +1,6 @@
 use std::io;
+#[cfg(target_family = "unix")]
+use std::os::unix::process::ExitStatusExt;
 use std::path::PathBuf;
 use std::process::ExitStatus;
 use std::process::Stdio;
@@ -18,17 +20,18 @@ use crate::error::Result;
 use crate::error::SandboxErr;
 use crate::protocol::SandboxPolicy;

-/// Maximum we send for each stream, which is either:
-/// - 10KiB OR
-/// - 256 lines
+// Maximum we send for each stream, which is either:
+// - 10KiB OR
+// - 256 lines
 const MAX_STREAM_OUTPUT: usize = 10 * 1024;
 const MAX_STREAM_OUTPUT_LINES: usize = 256;

 const DEFAULT_TIMEOUT_MS: u64 = 10_000;

-/// Hardcode this since it does not seem worth including the libc craate just
-/// for this.
+// Hardcode these since it does not seem worth including the libc crate just
+// for these.
 const SIGKILL_CODE: i32 = 9;
+const TIMEOUT_CODE: i32 = 64;

 const MACOS_SEATBELT_READONLY_POLICY: &str = include_str!("seatbelt_readonly_policy.sbpl");

@@ -113,10 +116,20 @@ pub async fn process_exec_tool_call(
    let duration = start.elapsed();
    match raw_output_result {
        Ok(raw_output) => {
-            let exit_code = raw_output.exit_status.code().unwrap_or(-1);
            let stdout = String::from_utf8_lossy(&raw_output.stdout).to_string();
            let stderr = String::from_utf8_lossy(&raw_output.stderr).to_string();

+            #[cfg(target_family = "unix")]
+            match raw_output.exit_status.signal() {
+                Some(TIMEOUT_CODE) => return Err(CodexErr::Sandbox(SandboxErr::Timeout)),
+                Some(signal) => {
+                    return Err(CodexErr::Sandbox(SandboxErr::Signal(signal)));
+                }
+                None => {}
+            }
+
+            let exit_code = raw_output.exit_status.code().unwrap_or(-1);
+
            // NOTE(ragona): This is much less restrictive than the previous check. If we exec
            // a command, and it returns anything other than success, we assume that it may have
            // been a sandboxing error and allow the user to retry. (The user of course may choose
@@ -244,7 +257,7 @@ pub async fn exec(
                    // timeout
                    child.start_kill()?;
                    // Debatable whether `child.wait().await` should be called here.
-                    synthetic_exit_status(128 + SIGKILL_CODE)
+                    synthetic_exit_status(128 + TIMEOUT_CODE)
                }
            }
        }
--- a/codex-rs/core/src/linux.rs
+++ b/codex-rs/core/src/linux.rs
@@ -168,11 +168,11 @@ mod tests_linux {
    use tokio::sync::Notify;

    #[allow(clippy::print_stdout)]
-    async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf]) {
+    async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
        let params = ExecParams {
            command: cmd.iter().map(|elm| elm.to_string()).collect(),
            workdir: None,
-            timeout_ms: Some(200),
+            timeout_ms: Some(timeout_ms),
        };
        let res = process_exec_tool_call(
            params,
@@ -193,7 +193,7 @@ mod tests_linux {

    #[tokio::test]
    async fn test_root_read() {
-        run_cmd(&["ls", "-l", "/bin"], &[]).await;
+        run_cmd(&["ls", "-l", "/bin"], &[], 200).await;
    }

    #[tokio::test]
@@ -204,13 +204,14 @@ mod tests_linux {
        run_cmd(
            &["bash", "-lc", &format!("echo blah > {}", tmpfile_path)],
            &[],
+            200,
        )
        .await;
    }

    #[tokio::test]
    async fn test_dev_null_write() {
-        run_cmd(&["echo", "blah", ">", "/dev/null"], &[]).await;
+        run_cmd(&["echo", "blah", ">", "/dev/null"], &[], 200).await;
    }

    #[tokio::test]
@@ -224,10 +225,17 @@ mod tests_linux {
                &format!("echo blah > {}", file_path.to_string_lossy()),
            ],
            &[tmpdir.path().to_path_buf()],
+            500,
        )
        .await;
    }

+    #[tokio::test]
+    #[should_panic(expected = "Sandbox(Timeout)")]
+    async fn test_timeout() {
+        run_cmd(&["sleep", "2"], &[], 50).await;
+    }
+
    /// Helper that runs `cmd` under the Linux sandbox and asserts that the command
    /// does NOT succeed (i.e. returns a non‑zero exit code) **unless** the binary
    /// is missing in which case we silently treat it as an accepted skip so the