[sandbox] Filter out certain non-sandbox errors (#1804)
## Summary Users frequently complain about re-approving commands that have failed for non-sandbox reasons. We can't diagnose with complete accuracy which errors happened because of a sandbox failure, but we can start to eliminate some common simple cases. This PR captures the most common case I've seen, which is a `command not found` error. ## Testing - [x] Added unit tests - [x] Ran a few cases locally
This commit is contained in:
@@ -140,11 +140,7 @@ pub async fn process_exec_tool_call(
|
||||
|
||||
let exit_code = raw_output.exit_status.code().unwrap_or(-1);
|
||||
|
||||
// NOTE(ragona): This is much less restrictive than the previous check. If we exec
|
||||
// a command, and it returns anything other than success, we assume that it may have
|
||||
// been a sandboxing error and allow the user to retry. (The user of course may choose
|
||||
// not to retry, or in a non-interactive mode, would automatically reject the approval.)
|
||||
if exit_code != 0 && sandbox_type != SandboxType::None {
|
||||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Denied(
|
||||
exit_code, stdout, stderr,
|
||||
)));
|
||||
@@ -223,6 +219,26 @@ fn create_linux_sandbox_command_args(
|
||||
linux_cmd
|
||||
}
|
||||
|
||||
/// We don't have a fully deterministic way to tell if our command failed
|
||||
/// because of the sandbox - a command in the user's zshrc file might hit an
|
||||
/// error, but the command itself might fail or succeed for other reasons.
|
||||
/// For now, we conservatively check for 'command not found' (exit code 127),
|
||||
/// and can add additional cases as necessary.
|
||||
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
|
||||
if sandbox_type == SandboxType::None {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Quick rejects: well-known non-sandbox shell exit codes
|
||||
// 127: command not found, 2: misuse of shell builtins
|
||||
if exit_code == 127 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// For all other cases, we assume the sandbox is the cause
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RawExecToolCallOutput {
|
||||
pub exit_status: ExitStatus,
|
||||
|
||||
@@ -38,7 +38,7 @@ pub mod plan_tool;
|
||||
mod project_doc;
|
||||
pub mod protocol;
|
||||
mod rollout;
|
||||
mod safety;
|
||||
pub(crate) mod safety;
|
||||
pub mod seatbelt;
|
||||
pub mod shell;
|
||||
pub mod spawn;
|
||||
@@ -47,3 +47,4 @@ pub mod util;
|
||||
|
||||
pub use apply_patch::CODEX_APPLY_PATCH_ARG1;
|
||||
pub use client_common::model_supports_reasoning_summaries;
|
||||
pub use safety::get_platform_sandbox;
|
||||
|
||||
69
codex-rs/core/tests/exec.rs
Normal file
69
codex-rs/core/tests/exec.rs
Normal file
@@ -0,0 +1,69 @@
|
||||
#![cfg(target_os = "macos")]
|
||||
#![expect(clippy::expect_used)]
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_core::exec::ExecParams;
|
||||
use codex_core::exec::SandboxType;
|
||||
use codex_core::exec::process_exec_tool_call;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_core::spawn::CODEX_SANDBOX_ENV_VAR;
|
||||
use tempfile::TempDir;
|
||||
use tokio::sync::Notify;
|
||||
|
||||
use codex_core::get_platform_sandbox;
|
||||
|
||||
async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>, should_be_ok: bool) {
|
||||
if std::env::var(CODEX_SANDBOX_ENV_VAR) == Ok("seatbelt".to_string()) {
|
||||
eprintln!("{CODEX_SANDBOX_ENV_VAR} is set to 'seatbelt', skipping test.");
|
||||
return;
|
||||
}
|
||||
|
||||
let sandbox_type = get_platform_sandbox().expect("should be able to get sandbox type");
|
||||
assert_eq!(sandbox_type, SandboxType::MacosSeatbelt);
|
||||
|
||||
let params = ExecParams {
|
||||
command: cmd.iter().map(|s| s.to_string()).collect(),
|
||||
cwd: tmp.path().to_path_buf(),
|
||||
timeout_ms: Some(1000),
|
||||
env: HashMap::new(),
|
||||
};
|
||||
|
||||
let ctrl_c = Arc::new(Notify::new());
|
||||
let policy = SandboxPolicy::new_read_only_policy();
|
||||
|
||||
let result = process_exec_tool_call(params, sandbox_type, ctrl_c, &policy, &None, None).await;
|
||||
|
||||
assert!(result.is_ok() == should_be_ok);
|
||||
}
|
||||
|
||||
/// Command succeeds with exit code 0 normally
|
||||
#[tokio::test]
|
||||
async fn exit_code_0_succeeds() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec!["echo", "hello"];
|
||||
|
||||
run_test_cmd(tmp, cmd, true).await
|
||||
}
|
||||
|
||||
/// Command not found returns exit code 127, this is not considered a sandbox error
|
||||
#[tokio::test]
|
||||
async fn exit_command_not_found_is_ok() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let cmd = vec!["/bin/bash", "-c", "nonexistent_command_12345"];
|
||||
run_test_cmd(tmp, cmd, true).await
|
||||
}
|
||||
|
||||
/// Writing a file fails and should be considered a sandbox error
|
||||
#[tokio::test]
|
||||
async fn write_file_fails_as_sandbox_error() {
|
||||
let tmp = TempDir::new().expect("should be able to create temp dir");
|
||||
let path = tmp.path().join("test.txt");
|
||||
let cmd = vec![
|
||||
"/user/bin/touch",
|
||||
path.to_str().expect("should be able to get path"),
|
||||
];
|
||||
|
||||
run_test_cmd(tmp, cmd, false).await;
|
||||
}
|
||||
Reference in New Issue
Block a user