From e3565a3f438c30c9d36412d2817346c7accd487c Mon Sep 17 00:00:00 2001 From: Dylan Date: Sun, 3 Aug 2025 13:05:48 -0700 Subject: [PATCH] [sandbox] Filter out certain non-sandbox errors (#1804) ## Summary Users frequently complain about re-approving commands that have failed for non-sandbox reasons. We can't diagnose with complete accuracy which errors happened because of a sandbox failure, but we can start to eliminate some common simple cases. This PR captures the most common case I've seen, which is a `command not found` error. ## Testing - [x] Added unit tests - [x] Ran a few cases locally --- codex-rs/core/src/exec.rs | 26 +++++++++++--- codex-rs/core/src/lib.rs | 3 +- codex-rs/core/tests/exec.rs | 69 +++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 6 deletions(-) create mode 100644 codex-rs/core/tests/exec.rs diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 5301f022..dce02cc5 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -140,11 +140,7 @@ pub async fn process_exec_tool_call( let exit_code = raw_output.exit_status.code().unwrap_or(-1); - // NOTE(ragona): This is much less restrictive than the previous check. If we exec - // a command, and it returns anything other than success, we assume that it may have - // been a sandboxing error and allow the user to retry. (The user of course may choose - // not to retry, or in a non-interactive mode, would automatically reject the approval.) - if exit_code != 0 && sandbox_type != SandboxType::None { + if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) { return Err(CodexErr::Sandbox(SandboxErr::Denied( exit_code, stdout, stderr, ))); @@ -223,6 +219,26 @@ fn create_linux_sandbox_command_args( linux_cmd } +/// We don't have a fully deterministic way to tell if our command failed +/// because of the sandbox - a command in the user's zshrc file might hit an +/// error, but the command itself might fail or succeed for other reasons. +/// For now, we conservatively check for 'command not found' (exit code 127), +/// and can add additional cases as necessary. +fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool { + if sandbox_type == SandboxType::None { + return false; + } + + // Quick rejects: well-known non-sandbox shell exit codes + // 127: command not found, 2: misuse of shell builtins + if exit_code == 127 { + return false; + } + + // For all other cases, we assume the sandbox is the cause + true +} + #[derive(Debug)] pub struct RawExecToolCallOutput { pub exit_status: ExitStatus, diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index a33e185a..80f90149 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -38,7 +38,7 @@ pub mod plan_tool; mod project_doc; pub mod protocol; mod rollout; -mod safety; +pub(crate) mod safety; pub mod seatbelt; pub mod shell; pub mod spawn; @@ -47,3 +47,4 @@ pub mod util; pub use apply_patch::CODEX_APPLY_PATCH_ARG1; pub use client_common::model_supports_reasoning_summaries; +pub use safety::get_platform_sandbox; diff --git a/codex-rs/core/tests/exec.rs b/codex-rs/core/tests/exec.rs new file mode 100644 index 00000000..da169296 --- /dev/null +++ b/codex-rs/core/tests/exec.rs @@ -0,0 +1,69 @@ +#![cfg(target_os = "macos")] +#![expect(clippy::expect_used)] + +use std::collections::HashMap; +use std::sync::Arc; + +use codex_core::exec::ExecParams; +use codex_core::exec::SandboxType; +use codex_core::exec::process_exec_tool_call; +use codex_core::protocol::SandboxPolicy; +use codex_core::spawn::CODEX_SANDBOX_ENV_VAR; +use tempfile::TempDir; +use tokio::sync::Notify; + +use codex_core::get_platform_sandbox; + +async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) { + if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) { + eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test."); + return; + } + + let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type"); + assert_eq!(sandbox_type, SandboxType::MacosSeatbelt); + + let params = ExecParams { + command: cmd.iter().map(|s| s.to_string()).collect(), + cwd: tmp.path().to_path_buf(), + timeout_ms: Some(1000), + env: HashMap::new(), + }; + + let ctrl_c = Arc::new(Notify::new()); + let policy = SandboxPolicy::new_read_only_policy(); + + let result = process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await; + + assert!(result.is_ok() == should_be_ok); +} + +/// Command succeeds with exit code 0 normally +#[tokio::test] +async fn exit_code_0_succeeds() { + let tmp = TempDir::new().expect("should be able to create temp dir"); + let cmd = vec!["echo", "hello"]; + + run_test_cmd(tmp, cmd, true).await +} + +/// Command not found returns exit code 127, this is not considered a sandbox error +#[tokio::test] +async fn exit_command_not_found_is_ok() { + let tmp = TempDir::new().expect("should be able to create temp dir"); + let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"]; + run_test_cmd(tmp, cmd, true).await +} + +/// Writing a file fails and should be considered a sandbox error +#[tokio::test] +async fn write_file_fails_as_sandbox_error() { + let tmp = TempDir::new().expect("should be able to create temp dir"); + let path = tmp.path().join("test.txt"); + let cmd = vec![ + "/user/bin/touch", + path.to_str().expect("should be able to get path"), + ]; + + run_test_cmd(tmp, cmd, false).await; +}