chore: sandbox refactor 2 (#4653)

Revert the revert and fix the UI issue
2025-10-03 11:17:39 +01:00
parent 69ac5153d4
commit 69cb72f842
14 changed files with 1437 additions and 414 deletions
--- a/codex-rs/core/src/executor/backends.rs
+++ b/codex-rs/core/src/executor/backends.rs
@@ -0,0 +1,101 @@
+use std::collections::HashMap;
+use std::env;
+
+use async_trait::async_trait;
+
+use crate::CODEX_APPLY_PATCH_ARG1;
+use crate::apply_patch::ApplyPatchExec;
+use crate::exec::ExecParams;
+use crate::function_tool::FunctionCallError;
+
+pub(crate) enum ExecutionMode {
+    Shell,
+    ApplyPatch(ApplyPatchExec),
+}
+
+#[async_trait]
+/// Backend-specific hooks that prepare and post-process execution requests for a
+/// given [`ExecutionMode`].
+pub(crate) trait ExecutionBackend: Send + Sync {
+    fn prepare(
+        &self,
+        params: ExecParams,
+        // Required for downcasting the apply_patch.
+        mode: &ExecutionMode,
+    ) -> Result<ExecParams, FunctionCallError>;
+
+    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
+        true
+    }
+}
+
+static SHELL_BACKEND: ShellBackend = ShellBackend;
+static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend;
+
+pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend {
+    match mode {
+        ExecutionMode::Shell => &SHELL_BACKEND,
+        ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND,
+    }
+}
+
+struct ShellBackend;
+
+#[async_trait]
+impl ExecutionBackend for ShellBackend {
+    fn prepare(
+        &self,
+        params: ExecParams,
+        mode: &ExecutionMode,
+    ) -> Result<ExecParams, FunctionCallError> {
+        match mode {
+            ExecutionMode::Shell => Ok(params),
+            _ => Err(FunctionCallError::RespondToModel(
+                "shell backend invoked with non-shell mode".to_string(),
+            )),
+        }
+    }
+}
+
+struct ApplyPatchBackend;
+
+#[async_trait]
+impl ExecutionBackend for ApplyPatchBackend {
+    fn prepare(
+        &self,
+        params: ExecParams,
+        mode: &ExecutionMode,
+    ) -> Result<ExecParams, FunctionCallError> {
+        match mode {
+            ExecutionMode::ApplyPatch(exec) => {
+                let path_to_codex = env::current_exe()
+                    .ok()
+                    .map(|p| p.to_string_lossy().to_string())
+                    .ok_or_else(|| {
+                        FunctionCallError::RespondToModel(
+                            "failed to determine path to codex executable".to_string(),
+                        )
+                    })?;
+
+                let patch = exec.action.patch.clone();
+                Ok(ExecParams {
+                    command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
+                    cwd: exec.action.cwd.clone(),
+                    timeout_ms: params.timeout_ms,
+                    // Run apply_patch with a minimal environment for determinism and to
+                    // avoid leaking host environment variables into the patch process.
+                    env: HashMap::new(),
+                    with_escalated_permissions: params.with_escalated_permissions,
+                    justification: params.justification,
+                })
+            }
+            ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
+                "apply_patch backend invoked without patch context".to_string(),
+            )),
+        }
+    }
+
+    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
+        false
+    }
+}
--- a/codex-rs/core/src/executor/cache.rs
+++ b/codex-rs/core/src/executor/cache.rs
@@ -0,0 +1,51 @@
+use std::collections::HashSet;
+use std::sync::Arc;
+use std::sync::Mutex;
+
+#[derive(Clone, Debug, Default)]
+/// Thread-safe store of user approvals so repeated commands can reuse
+/// previously granted trust.
+pub(crate) struct ApprovalCache {
+    inner: Arc<Mutex<HashSet<Vec<String>>>>,
+}
+
+impl ApprovalCache {
+    pub(crate) fn insert(&self, command: Vec<String>) {
+        if command.is_empty() {
+            return;
+        }
+        if let Ok(mut guard) = self.inner.lock() {
+            guard.insert(command);
+        }
+    }
+
+    pub(crate) fn snapshot(&self) -> HashSet<Vec<String>> {
+        self.inner.lock().map(|g| g.clone()).unwrap_or_default()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn insert_ignores_empty_and_dedupes() {
+        let cache = ApprovalCache::default();
+
+        // Empty should be ignored
+        cache.insert(vec![]);
+        assert!(cache.snapshot().is_empty());
+
+        // Insert a command and verify snapshot contains it
+        let cmd = vec!["foo".to_string(), "bar".to_string()];
+        cache.insert(cmd.clone());
+        let snap1 = cache.snapshot();
+        assert!(snap1.contains(&cmd));
+
+        // Reinserting should not create duplicates
+        cache.insert(cmd);
+        let snap2 = cache.snapshot();
+        assert_eq!(snap1, snap2);
+    }
+}
--- a/codex-rs/core/src/executor/mod.rs
+++ b/codex-rs/core/src/executor/mod.rs
@@ -0,0 +1,64 @@
+mod backends;
+mod cache;
+mod runner;
+mod sandbox;
+
+pub(crate) use backends::ExecutionMode;
+pub(crate) use runner::ExecutionRequest;
+pub(crate) use runner::Executor;
+pub(crate) use runner::ExecutorConfig;
+pub(crate) use runner::normalize_exec_result;
+
+pub(crate) mod linkers {
+    use crate::codex::ExecCommandContext;
+    use crate::exec::ExecParams;
+    use crate::exec::StdoutStream;
+    use crate::executor::backends::ExecutionMode;
+    use crate::executor::runner::ExecutionRequest;
+
+    pub struct PreparedExec {
+        pub(crate) context: ExecCommandContext,
+        pub(crate) request: ExecutionRequest,
+    }
+
+    impl PreparedExec {
+        pub fn new(
+            context: ExecCommandContext,
+            params: ExecParams,
+            approval_command: Vec<String>,
+            mode: ExecutionMode,
+            stdout_stream: Option<StdoutStream>,
+            use_shell_profile: bool,
+        ) -> Self {
+            let request = ExecutionRequest {
+                params,
+                approval_command,
+                mode,
+                stdout_stream,
+                use_shell_profile,
+            };
+
+            Self { context, request }
+        }
+    }
+}
+
+pub mod errors {
+    use crate::error::CodexErr;
+    use crate::function_tool::FunctionCallError;
+    use thiserror::Error;
+
+    #[derive(Debug, Error)]
+    pub enum ExecError {
+        #[error(transparent)]
+        Function(#[from] FunctionCallError),
+        #[error(transparent)]
+        Codex(#[from] CodexErr),
+    }
+
+    impl ExecError {
+        pub(crate) fn rejection(msg: impl Into<String>) -> Self {
+            FunctionCallError::RespondToModel(msg.into()).into()
+        }
+    }
+}
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -0,0 +1,408 @@
+use std::path::PathBuf;
+use std::sync::Arc;
+use std::sync::RwLock;
+use std::time::Duration;
+
+use super::backends::ExecutionMode;
+use super::backends::backend_for_mode;
+use super::cache::ApprovalCache;
+use crate::codex::ExecCommandContext;
+use crate::codex::Session;
+use crate::error::CodexErr;
+use crate::error::SandboxErr;
+use crate::error::get_error_message_ui;
+use crate::exec::ExecParams;
+use crate::exec::ExecToolCallOutput;
+use crate::exec::SandboxType;
+use crate::exec::StdoutStream;
+use crate::exec::StreamOutput;
+use crate::exec::process_exec_tool_call;
+use crate::executor::errors::ExecError;
+use crate::executor::sandbox::select_sandbox;
+use crate::function_tool::FunctionCallError;
+use crate::protocol::AskForApproval;
+use crate::protocol::ReviewDecision;
+use crate::protocol::SandboxPolicy;
+use crate::shell;
+use codex_otel::otel_event_manager::ToolDecisionSource;
+
+#[derive(Clone, Debug)]
+pub(crate) struct ExecutorConfig {
+    pub(crate) sandbox_policy: SandboxPolicy,
+    pub(crate) sandbox_cwd: PathBuf,
+    codex_linux_sandbox_exe: Option<PathBuf>,
+}
+
+impl ExecutorConfig {
+    pub(crate) fn new(
+        sandbox_policy: SandboxPolicy,
+        sandbox_cwd: PathBuf,
+        codex_linux_sandbox_exe: Option<PathBuf>,
+    ) -> Self {
+        Self {
+            sandbox_policy,
+            sandbox_cwd,
+            codex_linux_sandbox_exe,
+        }
+    }
+}
+
+/// Coordinates sandbox selection, backend-specific preparation, and command
+/// execution for tool calls requested by the model.
+pub(crate) struct Executor {
+    approval_cache: ApprovalCache,
+    config: Arc<RwLock<ExecutorConfig>>,
+}
+
+impl Executor {
+    pub(crate) fn new(config: ExecutorConfig) -> Self {
+        Self {
+            approval_cache: ApprovalCache::default(),
+            config: Arc::new(RwLock::new(config)),
+        }
+    }
+
+    /// Updates the sandbox policy and working directory used for future
+    /// executions without recreating the executor.
+    pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) {
+        if let Ok(mut cfg) = self.config.write() {
+            cfg.sandbox_policy = sandbox_policy;
+            cfg.sandbox_cwd = sandbox_cwd;
+        }
+    }
+
+    /// Runs a prepared execution request end-to-end: prepares parameters, decides on
+    /// sandbox placement (prompting the user when necessary), launches the command,
+    /// and lets the backend post-process the final output.
+    pub(crate) async fn run(
+        &self,
+        mut request: ExecutionRequest,
+        session: &Session,
+        approval_policy: AskForApproval,
+        context: &ExecCommandContext,
+    ) -> Result<ExecToolCallOutput, ExecError> {
+        if matches!(request.mode, ExecutionMode::Shell) {
+            request.params =
+                maybe_translate_shell_command(request.params, session, request.use_shell_profile);
+        }
+
+        // Step 1: Normalise parameters via the selected backend.
+        let backend = backend_for_mode(&request.mode);
+        let stdout_stream = if backend.stream_stdout(&request.mode) {
+            request.stdout_stream.clone()
+        } else {
+            None
+        };
+        request.params = backend
+            .prepare(request.params, &request.mode)
+            .map_err(ExecError::from)?;
+
+        // Step 2: Snapshot sandbox configuration so it stays stable for this run.
+        let config = self
+            .config
+            .read()
+            .map_err(|_| ExecError::rejection("executor config poisoned"))?
+            .clone();
+
+        // Step 3: Decide sandbox placement, prompting for approval when needed.
+        let sandbox_decision = select_sandbox(
+            &request,
+            approval_policy,
+            self.approval_cache.snapshot(),
+            &config,
+            session,
+            &context.sub_id,
+            &context.call_id,
+            &context.otel_event_manager,
+        )
+        .await?;
+        if sandbox_decision.record_session_approval {
+            self.approval_cache.insert(request.approval_command.clone());
+        }
+
+        // Step 4: Launch the command within the chosen sandbox.
+        let first_attempt = self
+            .spawn(
+                request.params.clone(),
+                sandbox_decision.initial_sandbox,
+                &config,
+                stdout_stream.clone(),
+            )
+            .await;
+
+        // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
+        match first_attempt {
+            Ok(output) => Ok(output),
+            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
+                Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into())
+            }
+            Err(CodexErr::Sandbox(error)) => {
+                if sandbox_decision.escalate_on_failure {
+                    self.retry_without_sandbox(
+                        &request,
+                        &config,
+                        session,
+                        context,
+                        stdout_stream,
+                        error,
+                    )
+                    .await
+                } else {
+                    let message = sandbox_failure_message(error);
+                    Err(ExecError::rejection(message))
+                }
+            }
+            Err(err) => Err(err.into()),
+        }
+    }
+
+    /// Fallback path invoked when a sandboxed run is denied so the user can
+    /// approve rerunning without isolation.
+    async fn retry_without_sandbox(
+        &self,
+        request: &ExecutionRequest,
+        config: &ExecutorConfig,
+        session: &Session,
+        context: &ExecCommandContext,
+        stdout_stream: Option<StdoutStream>,
+        sandbox_error: SandboxErr,
+    ) -> Result<ExecToolCallOutput, ExecError> {
+        session
+            .notify_background_event(
+                &context.sub_id,
+                format!("Execution failed: {sandbox_error}"),
+            )
+            .await;
+        let decision = session
+            .request_command_approval(
+                context.sub_id.to_string(),
+                context.call_id.to_string(),
+                request.approval_command.clone(),
+                request.params.cwd.clone(),
+                Some("command failed; retry without sandbox?".to_string()),
+            )
+            .await;
+
+        context.otel_event_manager.tool_decision(
+            &context.tool_name,
+            &context.call_id,
+            decision,
+            ToolDecisionSource::User,
+        );
+        match decision {
+            ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
+                if matches!(decision, ReviewDecision::ApprovedForSession) {
+                    self.approval_cache.insert(request.approval_command.clone());
+                }
+                session
+                    .notify_background_event(&context.sub_id, "retrying command without sandbox")
+                    .await;
+
+                let retry_output = self
+                    .spawn(
+                        request.params.clone(),
+                        SandboxType::None,
+                        config,
+                        stdout_stream,
+                    )
+                    .await?;
+
+                Ok(retry_output)
+            }
+            ReviewDecision::Denied | ReviewDecision::Abort => {
+                Err(ExecError::rejection("exec command rejected by user"))
+            }
+        }
+    }
+
+    async fn spawn(
+        &self,
+        params: ExecParams,
+        sandbox: SandboxType,
+        config: &ExecutorConfig,
+        stdout_stream: Option<StdoutStream>,
+    ) -> Result<ExecToolCallOutput, CodexErr> {
+        process_exec_tool_call(
+            params,
+            sandbox,
+            &config.sandbox_policy,
+            &config.sandbox_cwd,
+            &config.codex_linux_sandbox_exe,
+            stdout_stream,
+        )
+        .await
+    }
+}
+
+fn maybe_translate_shell_command(
+    params: ExecParams,
+    session: &Session,
+    use_shell_profile: bool,
+) -> ExecParams {
+    let should_translate =
+        matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile;
+
+    if should_translate
+        && let Some(command) = session
+            .user_shell()
+            .format_default_shell_invocation(params.command.clone())
+    {
+        return ExecParams { command, ..params };
+    }
+
+    params
+}
+
+fn sandbox_failure_message(error: SandboxErr) -> String {
+    let codex_error = CodexErr::Sandbox(error);
+    let friendly = get_error_message_ui(&codex_error);
+    format!("failed in sandbox: {friendly}")
+}
+
+pub(crate) struct ExecutionRequest {
+    pub params: ExecParams,
+    pub approval_command: Vec<String>,
+    pub mode: ExecutionMode,
+    pub stdout_stream: Option<StdoutStream>,
+    pub use_shell_profile: bool,
+}
+
+pub(crate) struct NormalizedExecOutput<'a> {
+    borrowed: Option<&'a ExecToolCallOutput>,
+    synthetic: Option<ExecToolCallOutput>,
+}
+
+impl<'a> NormalizedExecOutput<'a> {
+    pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput {
+        match (self.borrowed, self.synthetic.as_ref()) {
+            (Some(output), _) => output,
+            (None, Some(output)) => output,
+            (None, None) => unreachable!("normalized exec output missing data"),
+        }
+    }
+}
+
+/// Converts a raw execution result into a uniform view that always exposes an
+/// [`ExecToolCallOutput`], synthesizing error output when the command fails
+/// before producing a response.
+pub(crate) fn normalize_exec_result(
+    result: &Result<ExecToolCallOutput, ExecError>,
+) -> NormalizedExecOutput<'_> {
+    match result {
+        Ok(output) => NormalizedExecOutput {
+            borrowed: Some(output),
+            synthetic: None,
+        },
+        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => {
+            NormalizedExecOutput {
+                borrowed: Some(output.as_ref()),
+                synthetic: None,
+            }
+        }
+        Err(err) => {
+            let message = match err {
+                ExecError::Function(FunctionCallError::RespondToModel(msg)) => msg.clone(),
+                ExecError::Codex(e) => get_error_message_ui(e),
+            };
+            let synthetic = ExecToolCallOutput {
+                exit_code: -1,
+                stdout: StreamOutput::new(String::new()),
+                stderr: StreamOutput::new(message.clone()),
+                aggregated_output: StreamOutput::new(message),
+                duration: Duration::default(),
+                timed_out: false,
+            };
+            NormalizedExecOutput {
+                borrowed: None,
+                synthetic: Some(synthetic),
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::error::CodexErr;
+    use crate::error::EnvVarError;
+    use crate::error::SandboxErr;
+    use crate::exec::StreamOutput;
+    use pretty_assertions::assert_eq;
+
+    fn make_output(text: &str) -> ExecToolCallOutput {
+        ExecToolCallOutput {
+            exit_code: 1,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(text.to_string()),
+            duration: Duration::from_millis(123),
+            timed_out: false,
+        }
+    }
+
+    #[test]
+    fn normalize_success_borrows() {
+        let out = make_output("ok");
+        let result: Result<ExecToolCallOutput, ExecError> = Ok(out);
+        let normalized = normalize_exec_result(&result);
+        assert_eq!(normalized.event_output().aggregated_output.text, "ok");
+    }
+
+    #[test]
+    fn normalize_timeout_borrows_embedded_output() {
+        let out = make_output("timed out payload");
+        let err = CodexErr::Sandbox(SandboxErr::Timeout {
+            output: Box::new(out),
+        });
+        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(err));
+        let normalized = normalize_exec_result(&result);
+        assert_eq!(
+            normalized.event_output().aggregated_output.text,
+            "timed out payload"
+        );
+    }
+
+    #[test]
+    fn sandbox_failure_message_uses_denied_stderr() {
+        let output = ExecToolCallOutput {
+            exit_code: 101,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new("sandbox stderr".to_string()),
+            aggregated_output: StreamOutput::new(String::new()),
+            duration: Duration::from_millis(10),
+            timed_out: false,
+        };
+        let err = SandboxErr::Denied {
+            output: Box::new(output),
+        };
+        let message = sandbox_failure_message(err);
+        assert_eq!(message, "failed in sandbox: sandbox stderr");
+    }
+
+    #[test]
+    fn normalize_function_error_synthesizes_payload() {
+        let err = FunctionCallError::RespondToModel("boom".to_string());
+        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Function(err));
+        let normalized = normalize_exec_result(&result);
+        assert_eq!(normalized.event_output().aggregated_output.text, "boom");
+    }
+
+    #[test]
+    fn normalize_codex_error_synthesizes_user_message() {
+        // Use a simple EnvVar error which formats to a clear message
+        let e = CodexErr::EnvVar(EnvVarError {
+            var: "FOO".to_string(),
+            instructions: Some("set it".to_string()),
+        });
+        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(e));
+        let normalized = normalize_exec_result(&result);
+        assert!(
+            normalized
+                .event_output()
+                .aggregated_output
+                .text
+                .contains("Missing environment variable: `FOO`"),
+            "expected synthesized user-friendly message"
+        );
+    }
+}
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -0,0 +1,405 @@
+use crate::apply_patch::ApplyPatchExec;
+use crate::codex::Session;
+use crate::exec::SandboxType;
+use crate::executor::ExecutionMode;
+use crate::executor::ExecutionRequest;
+use crate::executor::ExecutorConfig;
+use crate::executor::errors::ExecError;
+use crate::safety::SafetyCheck;
+use crate::safety::assess_command_safety;
+use crate::safety::assess_patch_safety;
+use codex_otel::otel_event_manager::OtelEventManager;
+use codex_otel::otel_event_manager::ToolDecisionSource;
+use codex_protocol::protocol::AskForApproval;
+use codex_protocol::protocol::ReviewDecision;
+use std::collections::HashSet;
+
+/// Sandbox placement options selected for an execution run, including whether
+/// to escalate after failures and whether approvals should persist.
+pub(crate) struct SandboxDecision {
+    pub(crate) initial_sandbox: SandboxType,
+    pub(crate) escalate_on_failure: bool,
+    pub(crate) record_session_approval: bool,
+}
+
+impl SandboxDecision {
+    fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self {
+        Self {
+            initial_sandbox: sandbox,
+            escalate_on_failure,
+            record_session_approval: false,
+        }
+    }
+
+    fn user_override(record_session_approval: bool) -> Self {
+        Self {
+            initial_sandbox: SandboxType::None,
+            escalate_on_failure: false,
+            record_session_approval,
+        }
+    }
+}
+
+fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
+    matches!(
+        (approval, sandbox),
+        (
+            AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
+            SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
+        )
+    )
+}
+
+/// Determines how a command should be sandboxed, prompting the user when
+/// policy requires explicit approval.
+#[allow(clippy::too_many_arguments)]
+pub async fn select_sandbox(
+    request: &ExecutionRequest,
+    approval_policy: AskForApproval,
+    approval_cache: HashSet<Vec<String>>,
+    config: &ExecutorConfig,
+    session: &Session,
+    sub_id: &str,
+    call_id: &str,
+    otel_event_manager: &OtelEventManager,
+) -> Result<SandboxDecision, ExecError> {
+    match &request.mode {
+        ExecutionMode::Shell => {
+            select_shell_sandbox(
+                request,
+                approval_policy,
+                approval_cache,
+                config,
+                session,
+                sub_id,
+                call_id,
+                otel_event_manager,
+            )
+            .await
+        }
+        ExecutionMode::ApplyPatch(exec) => {
+            select_apply_patch_sandbox(exec, approval_policy, config)
+        }
+    }
+}
+
+#[allow(clippy::too_many_arguments)]
+async fn select_shell_sandbox(
+    request: &ExecutionRequest,
+    approval_policy: AskForApproval,
+    approved_snapshot: HashSet<Vec<String>>,
+    config: &ExecutorConfig,
+    session: &Session,
+    sub_id: &str,
+    call_id: &str,
+    otel_event_manager: &OtelEventManager,
+) -> Result<SandboxDecision, ExecError> {
+    let command_for_safety = if request.approval_command.is_empty() {
+        request.params.command.clone()
+    } else {
+        request.approval_command.clone()
+    };
+
+    let safety = assess_command_safety(
+        &command_for_safety,
+        approval_policy,
+        &config.sandbox_policy,
+        &approved_snapshot,
+        request.params.with_escalated_permissions.unwrap_or(false),
+    );
+
+    match safety {
+        SafetyCheck::AutoApprove {
+            sandbox_type,
+            user_explicitly_approved,
+        } => {
+            let mut decision = SandboxDecision::auto(
+                sandbox_type,
+                should_escalate_on_failure(approval_policy, sandbox_type),
+            );
+            if user_explicitly_approved {
+                decision.record_session_approval = true;
+            }
+            let (decision_for_event, source) = if user_explicitly_approved {
+                (ReviewDecision::ApprovedForSession, ToolDecisionSource::User)
+            } else {
+                (ReviewDecision::Approved, ToolDecisionSource::Config)
+            };
+            otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source);
+            Ok(decision)
+        }
+        SafetyCheck::AskUser => {
+            let decision = session
+                .request_command_approval(
+                    sub_id.to_string(),
+                    call_id.to_string(),
+                    request.approval_command.clone(),
+                    request.params.cwd.clone(),
+                    request.params.justification.clone(),
+                )
+                .await;
+
+            otel_event_manager.tool_decision(
+                "local_shell",
+                call_id,
+                decision,
+                ToolDecisionSource::User,
+            );
+            match decision {
+                ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
+                ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
+                ReviewDecision::Denied | ReviewDecision::Abort => {
+                    Err(ExecError::rejection("exec command rejected by user"))
+                }
+            }
+        }
+        SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!(
+            "exec command rejected: {reason}"
+        ))),
+    }
+}
+
+fn select_apply_patch_sandbox(
+    exec: &ApplyPatchExec,
+    approval_policy: AskForApproval,
+    config: &ExecutorConfig,
+) -> Result<SandboxDecision, ExecError> {
+    if exec.user_explicitly_approved_this_action {
+        return Ok(SandboxDecision::user_override(false));
+    }
+
+    match assess_patch_safety(
+        &exec.action,
+        approval_policy,
+        &config.sandbox_policy,
+        &config.sandbox_cwd,
+    ) {
+        SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto(
+            sandbox_type,
+            should_escalate_on_failure(approval_policy, sandbox_type),
+        )),
+        SafetyCheck::AskUser => Err(ExecError::rejection(
+            "patch requires approval but none was recorded",
+        )),
+        SafetyCheck::Reject { reason } => {
+            Err(ExecError::rejection(format!("patch rejected: {reason}")))
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::codex::make_session_and_context;
+    use crate::exec::ExecParams;
+    use crate::function_tool::FunctionCallError;
+    use crate::protocol::SandboxPolicy;
+    use codex_apply_patch::ApplyPatchAction;
+    use pretty_assertions::assert_eq;
+
+    #[tokio::test]
+    async fn select_apply_patch_user_override_when_explicit() {
+        let (session, ctx) = make_session_and_context();
+        let tmp = tempfile::tempdir().expect("tmp");
+        let p = tmp.path().join("a.txt");
+        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
+        let exec = ApplyPatchExec {
+            action,
+            user_explicitly_approved_this_action: true,
+        };
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                command: vec!["apply_patch".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["apply_patch".into()],
+            mode: ExecutionMode::ApplyPatch(exec),
+            stdout_stream: None,
+            use_shell_profile: false,
+        };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
+        let decision = select_sandbox(
+            &request,
+            AskForApproval::OnRequest,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+            &otel_event_manager,
+        )
+        .await
+        .expect("ok");
+        // Explicit user override runs without sandbox
+        assert_eq!(decision.initial_sandbox, SandboxType::None);
+        assert_eq!(decision.escalate_on_failure, false);
+    }
+
+    #[tokio::test]
+    async fn select_apply_patch_autoapprove_in_danger() {
+        let (session, ctx) = make_session_and_context();
+        let tmp = tempfile::tempdir().expect("tmp");
+        let p = tmp.path().join("a.txt");
+        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
+        let exec = ApplyPatchExec {
+            action,
+            user_explicitly_approved_this_action: false,
+        };
+        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                command: vec!["apply_patch".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["apply_patch".into()],
+            mode: ExecutionMode::ApplyPatch(exec),
+            stdout_stream: None,
+            use_shell_profile: false,
+        };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
+        let decision = select_sandbox(
+            &request,
+            AskForApproval::OnRequest,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+            &otel_event_manager,
+        )
+        .await
+        .expect("ok");
+        // On platforms with a sandbox, DangerFullAccess still prefers it
+        let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None);
+        assert_eq!(decision.initial_sandbox, expected);
+        assert_eq!(decision.escalate_on_failure, false);
+    }
+
+    #[tokio::test]
+    async fn select_apply_patch_requires_approval_on_unless_trusted() {
+        let (session, ctx) = make_session_and_context();
+        let tempdir = tempfile::tempdir().expect("tmpdir");
+        let p = tempdir.path().join("a.txt");
+        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
+        let exec = ApplyPatchExec {
+            action,
+            user_explicitly_approved_this_action: false,
+        };
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                command: vec!["apply_patch".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["apply_patch".into()],
+            mode: ExecutionMode::ApplyPatch(exec),
+            stdout_stream: None,
+            use_shell_profile: false,
+        };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
+        let result = select_sandbox(
+            &request,
+            AskForApproval::UnlessTrusted,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+            &otel_event_manager,
+        )
+        .await;
+        match result {
+            Ok(_) => panic!("expected error"),
+            Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => {
+                assert!(msg.contains("requires approval"))
+            }
+            Err(other) => panic!("unexpected error: {other:?}"),
+        }
+    }
+
+    #[tokio::test]
+    async fn select_shell_autoapprove_in_danger_mode() {
+        let (session, ctx) = make_session_and_context();
+        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                command: vec!["some-unknown".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["some-unknown".into()],
+            mode: ExecutionMode::Shell,
+            stdout_stream: None,
+            use_shell_profile: false,
+        };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
+        let decision = select_sandbox(
+            &request,
+            AskForApproval::OnRequest,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+            &otel_event_manager,
+        )
+        .await
+        .expect("ok");
+        assert_eq!(decision.initial_sandbox, SandboxType::None);
+        assert_eq!(decision.escalate_on_failure, false);
+    }
+
+    #[cfg(any(target_os = "macos", target_os = "linux"))]
+    #[tokio::test]
+    async fn select_shell_escalates_on_failure_with_platform_sandbox() {
+        let (session, ctx) = make_session_and_context();
+        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
+        let request = ExecutionRequest {
+            params: ExecParams {
+                // Unknown command => untrusted but not flagged dangerous
+                command: vec!["some-unknown".into()],
+                cwd: std::env::temp_dir(),
+                timeout_ms: None,
+                env: std::collections::HashMap::new(),
+                with_escalated_permissions: None,
+                justification: None,
+            },
+            approval_command: vec!["some-unknown".into()],
+            mode: ExecutionMode::Shell,
+            stdout_stream: None,
+            use_shell_profile: false,
+        };
+        let otel_event_manager = ctx.client.get_otel_event_manager();
+        let decision = select_sandbox(
+            &request,
+            AskForApproval::OnFailure,
+            Default::default(),
+            &cfg,
+            &session,
+            "sub",
+            "call",
+            &otel_event_manager,
+        )
+        .await
+        .expect("ok");
+        // On macOS/Linux we should have a platform sandbox and escalate on failure
+        assert_ne!(decision.initial_sandbox, SandboxType::None);
+        assert_eq!(decision.escalate_on_failure, true);
+    }
+}