From e3565a3f438c30c9d36412d2817346c7accd487c Mon Sep 17 00:00:00 2001
From: Dylan <dylan.hurd@openai.com>
Date: Sun, 3 Aug 2025 13:05:48 -0700
Subject: [PATCH] [sandbox] Filter out certain non-sandbox errors (#1804)

## Summary
Users frequently complain about re-approving commands that have failed
for non-sandbox reasons. We can't diagnose with complete accuracy which
errors happened because of a sandbox failure, but we can start to
eliminate some common simple cases.

This PR captures the most common case I've seen, which is a `command not
found` error.

## Testing
- [x] Added unit tests
- [x] Ran a few cases locally
---
 codex-rs/core/src/exec.rs   | 26 +++++++++++---
 codex-rs/core/src/lib.rs    |  3 +-
 codex-rs/core/tests/exec.rs | 69 +++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 6 deletions(-)
 create mode 100644 codex-rs/core/tests/exec.rs

diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs
index 5301f022..dce02cc5 100644
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -140,11 +140,7 @@ pub async fn process_exec_tool_call(
 
             let exit_code = raw_output.exit_status.code().unwrap_or(-1);
 
-            // NOTE(ragona): This is much less restrictive than the previous check. If we exec
-            // a command, and it returns anything other than success, we assume that it may have
-            // been a sandboxing error and allow the user to retry. (The user of course may choose
-            // not to retry, or in a non-interactive mode, would automatically reject the approval.)
-            if exit_code != 0 && sandbox_type != SandboxType::None {
+            if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
                 return Err(CodexErr::Sandbox(SandboxErr::Denied(
                     exit_code, stdout, stderr,
                 )));
@@ -223,6 +219,26 @@ fn create_linux_sandbox_command_args(
     linux_cmd
 }
 
+/// We don't have a fully deterministic way to tell if our command failed
+/// because of the sandbox - a command in the user's zshrc file might hit an
+/// error, but the command itself might fail or succeed for other reasons.
+/// For now, we conservatively check for 'command not found' (exit code 127),
+/// and can add additional cases as necessary.
+fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
+    if sandbox_type == SandboxType::None {
+        return false;
+    }
+
+    // Quick rejects: well-known non-sandbox shell exit codes
+    // 127: command not found, 2: misuse of shell builtins
+    if exit_code == 127 {
+        return false;
+    }
+
+    // For all other cases, we assume the sandbox is the cause
+    true
+}
+
 #[derive(Debug)]
 pub struct RawExecToolCallOutput {
     pub exit_status: ExitStatus,
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index a33e185a..80f90149 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -38,7 +38,7 @@ pub mod plan_tool;
 mod project_doc;
 pub mod protocol;
 mod rollout;
-mod safety;
+pub(crate) mod safety;
 pub mod seatbelt;
 pub mod shell;
 pub mod spawn;
@@ -47,3 +47,4 @@ pub mod util;
 
 pub use apply_patch::CODEX_APPLY_PATCH_ARG1;
 pub use client_common::model_supports_reasoning_summaries;
+pub use safety::get_platform_sandbox;
diff --git a/codex-rs/core/tests/exec.rs b/codex-rs/core/tests/exec.rs
new file mode 100644
index 00000000..da169296
--- /dev/null
+++ b/codex-rs/core/tests/exec.rs
@@ -0,0 +1,69 @@
+#![cfg(target_os = "macos")]
+#![expect(clippy::expect_used)]
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use codex_core::exec::ExecParams;
+use codex_core::exec::SandboxType;
+use codex_core::exec::process_exec_tool_call;
+use codex_core::protocol::SandboxPolicy;
+use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
+use tempfile::TempDir;
+use tokio::sync::Notify;
+
+use codex_core::get_platform_sandbox;
+
+async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
+    if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
+        eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
+        return;
+    }
+
+    let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
+    assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
+
+    let params = ExecParams {
+        command: cmd.iter().map(|s| s.to_string()).collect(),
+        cwd: tmp.path().to_path_buf(),
+        timeout_ms: Some(1000),
+        env: HashMap::new(),
+    };
+
+    let ctrl_c = Arc::new(Notify::new());
+    let policy = SandboxPolicy::new_read_only_policy();
+
+    let result = process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await;
+
+    assert!(result.is_ok() == should_be_ok);
+}
+
+/// Command succeeds with exit code 0 normally
+#[tokio::test]
+async fn exit_code_0_succeeds() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let cmd = vec!["echo", "hello"];
+
+    run_test_cmd(tmp, cmd, true).await
+}
+
+/// Command not found returns exit code 127, this is not considered a sandbox error
+#[tokio::test]
+async fn exit_command_not_found_is_ok() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
+    run_test_cmd(tmp, cmd, true).await
+}
+
+/// Writing a file fails and should be considered a sandbox error
+#[tokio::test]
+async fn write_file_fails_as_sandbox_error() {
+    let tmp = TempDir::new().expect("should be able to create temp dir");
+    let path = tmp.path().join("test.txt");
+    let cmd = vec![
+        "/user/bin/touch",
+        path.to_str().expect("should be able to get path"),
+    ];
+
+    run_test_cmd(tmp, cmd, false).await;
+}