chore: sandbox refactor 2 (#4653)

Revert the revert and fix the UI issue
2025-10-03 11:17:39 +01:00
parent 69ac5153d4
commit 69cb72f842
14 changed files with 1437 additions and 414 deletions
--- a/codex-rs/app-server/tests/common/mcp_process.rs
+++ b/codex-rs/app-server/tests/common/mcp_process.rs
@@ -1,3 +1,4 @@
 use std::collections::VecDeque;
 use std::path::Path;
 use std::process::Stdio;
 use std::sync::atomic::AtomicI64;
@@ -47,6 +48,7 @@ pub struct McpProcess {
    process: Child,
    stdin: ChildStdin,
    stdout: BufReader<ChildStdout>,
    pending_user_messages: VecDeque<JSONRPCNotification>,
 }
 impl McpProcess {
@@ -117,6 +119,7 @@ impl McpProcess {
            process,
            stdin,
            stdout,
            pending_user_messages: VecDeque::new(),
        })
    }
@@ -375,8 +378,9 @@ impl McpProcess {
            let message = self.read_jsonrpc_message().await?;
            match message {
-                JSONRPCMessage::Notification(_) => {
+                JSONRPCMessage::Notification(notification) => {
-                    eprintln!("notification: {message:?}");
+                    eprintln!("notification: {notification:?}");
                    self.enqueue_user_message(notification);
                }
                JSONRPCMessage::Request(jsonrpc_request) => {
                    return jsonrpc_request.try_into().with_context(
@@ -402,8 +406,9 @@ impl McpProcess {
        loop {
            let message = self.read_jsonrpc_message().await?;
            match message {
-                JSONRPCMessage::Notification(_) => {
+                JSONRPCMessage::Notification(notification) => {
-                    eprintln!("notification: {message:?}");
+                    eprintln!("notification: {notification:?}");
                    self.enqueue_user_message(notification);
                }
                JSONRPCMessage::Request(_) => {
                    anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
@@ -427,8 +432,9 @@ impl McpProcess {
        loop {
            let message = self.read_jsonrpc_message().await?;
            match message {
-                JSONRPCMessage::Notification(_) => {
+                JSONRPCMessage::Notification(notification) => {
-                    eprintln!("notification: {message:?}");
+                    eprintln!("notification: {notification:?}");
                    self.enqueue_user_message(notification);
                }
                JSONRPCMessage::Request(_) => {
                    anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
@@ -451,6 +457,10 @@ impl McpProcess {
    ) -> anyhow::Result<JSONRPCNotification> {
        eprintln!("in read_stream_until_notification_message({method})");
        if let Some(notification) = self.take_pending_notification_by_method(method) {
            return Ok(notification);
        }
        loop {
            let message = self.read_jsonrpc_message().await?;
            match message {
@@ -458,6 +468,7 @@ impl McpProcess {
                    if notification.method == method {
                        return Ok(notification);
                    }
                    self.enqueue_user_message(notification);
                }
                JSONRPCMessage::Request(_) => {
                    anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
@@ -471,4 +482,21 @@ impl McpProcess {
            }
        }
    }
    fn take_pending_notification_by_method(&mut self, method: &str) -> Option<JSONRPCNotification> {
        if let Some(pos) = self
            .pending_user_messages
            .iter()
            .position(|notification| notification.method == method)
        {
            return self.pending_user_messages.remove(pos);
        }
        None
    }
    fn enqueue_user_message(&mut self, notification: JSONRPCNotification) {
        if notification.method == "codex/event/user_message" {
            self.pending_user_messages.push_back(notification);
        }
    }
 }
--- a/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
+++ b/codex-rs/app-server/tests/suite/codex_message_processor_flow.rs
@@ -8,6 +8,7 @@ use app_test_support::to_response;
 use codex_app_server_protocol::AddConversationListenerParams;
 use codex_app_server_protocol::AddConversationSubscriptionResponse;
 use codex_app_server_protocol::ExecCommandApprovalParams;
 use codex_app_server_protocol::InputItem;
 use codex_app_server_protocol::JSONRPCNotification;
 use codex_app_server_protocol::JSONRPCResponse;
 use codex_app_server_protocol::NewConversationParams;
@@ -25,6 +26,10 @@ use codex_core::protocol::SandboxPolicy;
 use codex_core::protocol_config_types::ReasoningEffort;
 use codex_core::protocol_config_types::ReasoningSummary;
 use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
 use codex_protocol::config_types::SandboxMode;
 use codex_protocol::protocol::Event;
 use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::InputMessageKind;
 use pretty_assertions::assert_eq;
 use std::env;
 use tempfile::TempDir;
@@ -367,6 +372,234 @@ async fn test_send_user_turn_changes_approval_policy_behavior() {
 }
 // Helper: minimal config.toml pointing at mock provider.
 #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
 async fn test_send_user_turn_updates_sandbox_and_cwd_between_turns() {
    if env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
        println!(
            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
        );
        return;
    }
    let tmp = TempDir::new().expect("tmp dir");
    let codex_home = tmp.path().join("codex_home");
    std::fs::create_dir(&codex_home).expect("create codex home dir");
    let workspace_root = tmp.path().join("workspace");
    std::fs::create_dir(&workspace_root).expect("create workspace root");
    let first_cwd = workspace_root.join("turn1");
    let second_cwd = workspace_root.join("turn2");
    std::fs::create_dir(&first_cwd).expect("create first cwd");
    std::fs::create_dir(&second_cwd).expect("create second cwd");
    let responses = vec![
        create_shell_sse_response(
            vec![
                "bash".to_string(),
                "-lc".to_string(),
                "echo first turn".to_string(),
            ],
            None,
            Some(5000),
            "call-first",
        )
        .expect("create first shell response"),
        create_final_assistant_message_sse_response("done first")
            .expect("create first final assistant message"),
        create_shell_sse_response(
            vec![
                "bash".to_string(),
                "-lc".to_string(),
                "echo second turn".to_string(),
            ],
            None,
            Some(5000),
            "call-second",
        )
        .expect("create second shell response"),
        create_final_assistant_message_sse_response("done second")
            .expect("create second final assistant message"),
    ];
    let server = create_mock_chat_completions_server(responses).await;
    create_config_toml(&codex_home, &server.uri()).expect("write config");
    let mut mcp = McpProcess::new(&codex_home)
        .await
        .expect("spawn mcp process");
    timeout(DEFAULT_READ_TIMEOUT, mcp.initialize())
        .await
        .expect("init timeout")
        .expect("init failed");
    let new_conv_id = mcp
        .send_new_conversation_request(NewConversationParams {
            cwd: Some(first_cwd.to_string_lossy().into_owned()),
            approval_policy: Some(AskForApproval::Never),
            sandbox: Some(SandboxMode::WorkspaceWrite),
            ..Default::default()
        })
        .await
        .expect("send newConversation");
    let new_conv_resp: JSONRPCResponse = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(new_conv_id)),
    )
    .await
    .expect("newConversation timeout")
    .expect("newConversation resp");
    let NewConversationResponse {
        conversation_id,
        model,
        ..
    } = to_response::<NewConversationResponse>(new_conv_resp)
        .expect("deserialize newConversation response");
    let add_listener_id = mcp
        .send_add_conversation_listener_request(AddConversationListenerParams { conversation_id })
        .await
        .expect("send addConversationListener");
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(add_listener_id)),
    )
    .await
    .expect("addConversationListener timeout")
    .expect("addConversationListener resp");
    let first_turn_id = mcp
        .send_send_user_turn_request(SendUserTurnParams {
            conversation_id,
            items: vec![InputItem::Text {
                text: "first turn".to_string(),
            }],
            cwd: first_cwd.clone(),
            approval_policy: AskForApproval::Never,
            sandbox_policy: SandboxPolicy::WorkspaceWrite {
                writable_roots: vec![first_cwd.clone()],
                network_access: false,
                exclude_tmpdir_env_var: false,
                exclude_slash_tmp: false,
            },
            model: model.clone(),
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
        })
        .await
        .expect("send first sendUserTurn");
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(first_turn_id)),
    )
    .await
    .expect("sendUserTurn 1 timeout")
    .expect("sendUserTurn 1 resp");
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
    .await
    .expect("task_complete 1 timeout")
    .expect("task_complete 1 notification");
    let second_turn_id = mcp
        .send_send_user_turn_request(SendUserTurnParams {
            conversation_id,
            items: vec![InputItem::Text {
                text: "second turn".to_string(),
            }],
            cwd: second_cwd.clone(),
            approval_policy: AskForApproval::Never,
            sandbox_policy: SandboxPolicy::DangerFullAccess,
            model: model.clone(),
            effort: Some(ReasoningEffort::Medium),
            summary: ReasoningSummary::Auto,
        })
        .await
        .expect("send second sendUserTurn");
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_response_message(RequestId::Integer(second_turn_id)),
    )
    .await
    .expect("sendUserTurn 2 timeout")
    .expect("sendUserTurn 2 resp");
    let mut env_message: Option<String> = None;
    let second_cwd_str = second_cwd.to_string_lossy().into_owned();
    for _ in 0..10 {
        let notification = timeout(
            DEFAULT_READ_TIMEOUT,
            mcp.read_stream_until_notification_message("codex/event/user_message"),
        )
        .await
        .expect("user_message timeout")
        .expect("user_message notification");
        let params = notification
            .params
            .clone()
            .expect("user_message should include params");
        let event: Event = serde_json::from_value(params).expect("deserialize user_message event");
        if let EventMsg::UserMessage(user) = event.msg
            && matches!(user.kind, Some(InputMessageKind::EnvironmentContext))
            && user.message.contains(&second_cwd_str)
        {
            env_message = Some(user.message);
            break;
        }
    }
    let env_message = env_message.expect("expected environment context update");
    assert!(
        env_message.contains("<sandbox_mode>danger-full-access</sandbox_mode>"),
        "env context should reflect new sandbox mode: {env_message}"
    );
    assert!(
        env_message.contains("<network_access>enabled</network_access>"),
        "env context should enable network access for danger-full-access policy: {env_message}"
    );
    assert!(
        env_message.contains(&second_cwd_str),
        "env context should include updated cwd: {env_message}"
    );
    let exec_begin_notification = timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/exec_command_begin"),
    )
    .await
    .expect("exec_command_begin timeout")
    .expect("exec_command_begin notification");
    let params = exec_begin_notification
        .params
        .clone()
        .expect("exec_command_begin params");
    let event: Event = serde_json::from_value(params).expect("deserialize exec begin event");
    let exec_begin = match event.msg {
        EventMsg::ExecCommandBegin(exec_begin) => exec_begin,
        other => panic!("expected ExecCommandBegin event, got {other:?}"),
    };
    assert_eq!(
        exec_begin.cwd, second_cwd,
        "exec turn should run from updated cwd"
    );
    assert_eq!(
        exec_begin.command,
        vec![
            "bash".to_string(),
            "-lc".to_string(),
            "echo second turn".to_string()
        ],
        "exec turn should run expected command"
    );
    timeout(
        DEFAULT_READ_TIMEOUT,
        mcp.read_stream_until_notification_message("codex/event/task_complete"),
    )
    .await
    .expect("task_complete 2 timeout")
    .expect("task_complete 2 notification");
 }
 fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
    let config_toml = codex_home.join("config.toml");
    std::fs::write(
--- a/codex-rs/core/src/apply_patch.rs
+++ b/codex-rs/core/src/apply_patch.rs
@@ -27,6 +27,7 @@ pub(crate) enum InternalApplyPatchInvocation {
    DelegateToExec(ApplyPatchExec),
 }
 #[derive(Debug)]
 pub(crate) struct ApplyPatchExec {
    pub(crate) action: ApplyPatchAction,
    pub(crate) user_explicitly_approved_this_action: bool,
@@ -109,3 +110,28 @@ pub(crate) fn convert_apply_patch_to_protocol(
    }
    result
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use pretty_assertions::assert_eq;
    use tempfile::tempdir;
    #[test]
    fn convert_apply_patch_maps_add_variant() {
        let tmp = tempdir().expect("tmp");
        let p = tmp.path().join("a.txt");
        // Create an action with a single Add change
        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
        let got = convert_apply_patch_to_protocol(&action);
        assert_eq!(
            got.get(&p),
            Some(&FileChange::Add {
                content: "hello".to_string()
            })
        );
    }
 }
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -1,11 +1,9 @@
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt::Debug;
 use std::path::Path;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::atomic::AtomicU64;
 use std::time::Duration;
 use crate::AuthManager;
 use crate::client_common::REVIEW_PROMPT;
@@ -45,7 +43,6 @@ use tracing::warn;
 use crate::ModelProviderInfo;
 use crate::apply_patch;
 use crate::apply_patch::ApplyPatchExec;
 use crate::apply_patch::CODEX_APPLY_PATCH_ARG1;
 use crate::apply_patch::InternalApplyPatchInvocation;
 use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::client::ModelClient;
@@ -58,19 +55,21 @@ use crate::environment_context::EnvironmentContext;
 use crate::error::CodexErr;
 use crate::error::Result as CodexResult;
 use crate::error::SandboxErr;
 use crate::error::get_error_message_ui;
 use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
 #[cfg(test)]
 use crate::exec::StreamOutput;
 use crate::exec::process_exec_tool_call;
 use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
 use crate::exec_command::ExecCommandParams;
 use crate::exec_command::ExecSessionManager;
 use crate::exec_command::WRITE_STDIN_TOOL_NAME;
 use crate::exec_command::WriteStdinParams;
 use crate::exec_env::create_env;
 use crate::executor::ExecutionMode;
 use crate::executor::Executor;
 use crate::executor::ExecutorConfig;
 use crate::executor::normalize_exec_result;
 use crate::mcp_connection_manager::McpConnectionManager;
 use crate::mcp_tool_call::handle_mcp_tool_call;
 use crate::model_family::find_family_for_model;
@@ -115,9 +114,6 @@ use crate::protocol::ViewImageToolCallEvent;
 use crate::protocol::WebSearchBeginEvent;
 use crate::rollout::RolloutRecorder;
 use crate::rollout::RolloutRecorderParams;
 use crate::safety::SafetyCheck;
 use crate::safety::assess_command_safety;
 use crate::safety::assess_safety_for_untrusted_command;
 use crate::shell;
 use crate::state::ActiveTurn;
 use crate::state::SessionServices;
@@ -130,7 +126,6 @@ use crate::user_instructions::UserInstructions;
 use crate::user_notification::UserNotification;
 use crate::util::backoff;
 use codex_otel::otel_event_manager::OtelEventManager;
 use codex_otel::otel_event_manager::ToolDecisionSource;
 use codex_protocol::config_types::ReasoningEffort as ReasoningEffortConfig;
 use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
 use codex_protocol::custom_prompts::CustomPrompt;
@@ -495,9 +490,13 @@ impl Session {
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: notify,
            rollout: Mutex::new(Some(rollout_recorder)),
            codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
            user_shell: default_shell,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            executor: Executor::new(ExecutorConfig::new(
                turn_context.sandbox_policy.clone(),
                turn_context.cwd.clone(),
                config.codex_linux_sandbox_exe.clone(),
            )),
        };
        let sess = Arc::new(Session {
@@ -582,6 +581,11 @@ impl Session {
        }
    }
    /// Emit an exec approval request event and await the user's decision.
    ///
    /// The request is keyed by `sub_id`/`call_id` so matching responses are delivered
    /// to the correct in-flight turn. If the task is aborted, this returns the
    /// default `ReviewDecision` (`Denied`).
    pub async fn request_command_approval(
        &self,
        sub_id: String,
@@ -679,11 +683,6 @@ impl Session {
        }
    }
    pub async fn add_approved_command(&self, cmd: Vec<String>) {
        let mut state = self.state.lock().await;
        state.add_approved_command(cmd);
    }
    /// Records input items: always append to conversation history and
    /// persist these response items to rollout.
    async fn record_conversation_items(&self, items: &[ResponseItem]) {
@@ -841,6 +840,7 @@ impl Session {
            command_for_display,
            cwd,
            apply_patch,
            ..
        } = exec_command_context;
        let msg = match apply_patch {
            Some(ApplyPatchCommandContext {
@@ -937,45 +937,29 @@ impl Session {
    /// command even on error.
    ///
    /// Returns the output of the exec tool call.
-    async fn run_exec_with_events<'a>(
+    async fn run_exec_with_events(
        &self,
        turn_diff_tracker: &mut TurnDiffTracker,
-        begin_ctx: ExecCommandContext,
+        prepared: PreparedExec,
-        exec_args: ExecInvokeArgs<'a>,
+        approval_policy: AskForApproval,
-    ) -> crate::error::Result<ExecToolCallOutput> {
+    ) -> Result<ExecToolCallOutput, ExecError> {
-        let is_apply_patch = begin_ctx.apply_patch.is_some();
+        let PreparedExec { context, request } = prepared;
-        let sub_id = begin_ctx.sub_id.clone();
+        let is_apply_patch = context.apply_patch.is_some();
-        let call_id = begin_ctx.call_id.clone();
+        let sub_id = context.sub_id.clone();
        let call_id = context.call_id.clone();
-        self.on_exec_command_begin(turn_diff_tracker, begin_ctx.clone())
+        self.on_exec_command_begin(turn_diff_tracker, context.clone())
            .await;
-        let result = process_exec_tool_call(
+        let result = self
-            exec_args.params,
+            .services
-            exec_args.sandbox_type,
+            .executor
-            exec_args.sandbox_policy,
+            .run(request, self, approval_policy, &context)
-            exec_args.sandbox_cwd,
+            .await;
-            exec_args.codex_linux_sandbox_exe,
+
-            exec_args.stdout_stream,
+        let normalized = normalize_exec_result(&result);
-        )
+        let borrowed = normalized.event_output();
        .await;
        let output_stderr;
        let borrowed: &ExecToolCallOutput = match &result {
            Ok(output) => output,
            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
            Err(e) => {
                output_stderr = ExecToolCallOutput {
                    exit_code: -1,
                    stdout: StreamOutput::new(String::new()),
                    stderr: StreamOutput::new(get_error_message_ui(e)),
                    aggregated_output: StreamOutput::new(get_error_message_ui(e)),
                    duration: Duration::default(),
                    timed_out: false,
                };
                &output_stderr
            }
        };
        self.on_exec_command_end(
            turn_diff_tracker,
            &sub_id,
@@ -985,13 +969,15 @@ impl Session {
        )
        .await;
        drop(normalized);
        result
    }
    /// Helper that emits a BackgroundEvent with the given message. This keeps
    /// the call‑sites terse so adding more diagnostics does not clutter the
    /// core agent logic.
-    async fn notify_background_event(&self, sub_id: &str, message: impl Into<String>) {
+    pub(crate) async fn notify_background_event(&self, sub_id: &str, message: impl Into<String>) {
        let event = Event {
            id: sub_id.to_string(),
            msg: EventMsg::BackgroundEvent(BackgroundEventEvent {
@@ -1079,7 +1065,7 @@ impl Session {
        &self.services.notifier
    }
-    fn user_shell(&self) -> &shell::Shell {
+    pub(crate) fn user_shell(&self) -> &shell::Shell {
        &self.services.user_shell
    }
@@ -1101,6 +1087,8 @@ pub(crate) struct ExecCommandContext {
    pub(crate) command_for_display: Vec<String>,
    pub(crate) cwd: PathBuf,
    pub(crate) apply_patch: Option<ApplyPatchCommandContext>,
    pub(crate) tool_name: String,
    pub(crate) otel_event_manager: OtelEventManager,
 }
 #[derive(Clone, Debug)]
@@ -1307,8 +1295,19 @@ async fn submission_loop(
                    let previous_env_context = EnvironmentContext::from(turn_context.as_ref());
                    let new_env_context = EnvironmentContext::from(&fresh_turn_context);
                    if !new_env_context.equals_except_shell(&previous_env_context) {
-                        sess.record_conversation_items(&[ResponseItem::from(new_env_context)])
+                        let env_response_item = ResponseItem::from(new_env_context);
                        sess.record_conversation_items(std::slice::from_ref(&env_response_item))
                            .await;
                        for msg in map_response_item_to_event_messages(
                            &env_response_item,
                            sess.show_raw_agent_reasoning(),
                        ) {
                            let event = Event {
                                id: sub.id.clone(),
                                msg,
                            };
                            sess.send_event(event).await;
                        }
                    }
                    // Install the new persistent context for subsequent tasks/turns.
@@ -2627,33 +2626,6 @@ fn parse_container_exec_arguments(
        })
 }
 pub struct ExecInvokeArgs<'a> {
    pub params: ExecParams,
    pub sandbox_type: SandboxType,
    pub sandbox_policy: &'a SandboxPolicy,
    pub sandbox_cwd: &'a Path,
    pub codex_linux_sandbox_exe: &'a Option<PathBuf>,
    pub stdout_stream: Option<StdoutStream>,
 }
 fn maybe_translate_shell_command(
    params: ExecParams,
    sess: &Session,
    turn_context: &TurnContext,
 ) -> ExecParams {
    let should_translate = matches!(sess.user_shell(), crate::shell::Shell::PowerShell(_))
        || turn_context.shell_environment_policy.use_profile;
    if should_translate
        && let Some(command) = sess
            .user_shell()
            .format_default_shell_invocation(params.command.clone())
    {
        return ExecParams { command, ..params };
    }
    params
 }
 async fn handle_container_exec_with_params(
    tool_name: &str,
    params: ExecParams,
@@ -2699,152 +2671,10 @@ async fn handle_container_exec_with_params(
        MaybeApplyPatchVerified::NotApplyPatch => None,
    };
-    let (params, safety, command_for_display) = match &apply_patch_exec {
+    let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() {
-        Some(ApplyPatchExec {
+        vec!["apply_patch".to_string(), exec.action.patch.clone()]
-            action: ApplyPatchAction { patch, cwd, .. },
+    } else {
-            user_explicitly_approved_this_action,
+        params.command.clone()
        }) => {
            let path_to_codex = std::env::current_exe()
                .ok()
                .map(|p| p.to_string_lossy().to_string());
            let Some(path_to_codex) = path_to_codex else {
                return Err(FunctionCallError::RespondToModel(
                    "failed to determine path to codex executable".to_string(),
                ));
            };
            let params = ExecParams {
                command: vec![
                    path_to_codex,
                    CODEX_APPLY_PATCH_ARG1.to_string(),
                    patch.clone(),
                ],
                cwd: cwd.clone(),
                timeout_ms: params.timeout_ms,
                env: HashMap::new(),
                with_escalated_permissions: params.with_escalated_permissions,
                justification: params.justification.clone(),
            };
            let safety = if *user_explicitly_approved_this_action {
                SafetyCheck::AutoApprove {
                    sandbox_type: SandboxType::None,
                    user_explicitly_approved: true,
                }
            } else {
                assess_safety_for_untrusted_command(
                    turn_context.approval_policy,
                    &turn_context.sandbox_policy,
                    params.with_escalated_permissions.unwrap_or(false),
                )
            };
            (
                params,
                safety,
                vec!["apply_patch".to_string(), patch.clone()],
            )
        }
        None => {
            let safety = {
                let state = sess.state.lock().await;
                assess_command_safety(
                    &params.command,
                    turn_context.approval_policy,
                    &turn_context.sandbox_policy,
                    state.approved_commands_ref(),
                    params.with_escalated_permissions.unwrap_or(false),
                )
            };
            let command_for_display = params.command.clone();
            (params, safety, command_for_display)
        }
    };
    let sandbox_type = match safety {
        SafetyCheck::AutoApprove {
            sandbox_type,
            user_explicitly_approved,
        } => {
            otel_event_manager.tool_decision(
                tool_name,
                call_id.as_str(),
                ReviewDecision::Approved,
                if user_explicitly_approved {
                    ToolDecisionSource::User
                } else {
                    ToolDecisionSource::Config
                },
            );
            sandbox_type
        }
        SafetyCheck::AskUser => {
            let decision = sess
                .request_command_approval(
                    sub_id.clone(),
                    call_id.clone(),
                    params.command.clone(),
                    params.cwd.clone(),
                    params.justification.clone(),
                )
                .await;
            match decision {
                ReviewDecision::Approved => {
                    otel_event_manager.tool_decision(
                        tool_name,
                        call_id.as_str(),
                        ReviewDecision::Approved,
                        ToolDecisionSource::User,
                    );
                }
                ReviewDecision::ApprovedForSession => {
                    otel_event_manager.tool_decision(
                        tool_name,
                        call_id.as_str(),
                        ReviewDecision::ApprovedForSession,
                        ToolDecisionSource::User,
                    );
                    sess.add_approved_command(params.command.clone()).await;
                }
                ReviewDecision::Denied => {
                    otel_event_manager.tool_decision(
                        tool_name,
                        call_id.as_str(),
                        ReviewDecision::Denied,
                        ToolDecisionSource::User,
                    );
                    return Err(FunctionCallError::RespondToModel(
                        "exec command rejected by user".to_string(),
                    ));
                }
                ReviewDecision::Abort => {
                    otel_event_manager.tool_decision(
                        tool_name,
                        call_id.as_str(),
                        ReviewDecision::Abort,
                        ToolDecisionSource::User,
                    );
                    return Err(FunctionCallError::RespondToModel(
                        "exec command aborted by user".to_string(),
                    ));
                }
            }
            // No sandboxing is applied because the user has given
            // explicit approval. Often, we end up in this case because
            // the command cannot be run in a sandbox, such as
            // installing a new dependency that requires network access.
            SandboxType::None
        }
        SafetyCheck::Reject { reason } => {
            otel_event_manager.tool_decision(
                tool_name,
                call_id.as_str(),
                ReviewDecision::Denied,
                ToolDecisionSource::Config,
            );
            return Err(FunctionCallError::RespondToModel(format!(
                "exec command rejected: {reason:?}"
            )));
        }
    };
    let exec_command_context = ExecCommandContext {
@@ -2852,38 +2682,47 @@ async fn handle_container_exec_with_params(
        call_id: call_id.clone(),
        command_for_display: command_for_display.clone(),
        cwd: params.cwd.clone(),
-        apply_patch: apply_patch_exec.map(
+        apply_patch: apply_patch_exec.as_ref().map(
            |ApplyPatchExec {
                 action,
                 user_explicitly_approved_this_action,
             }| ApplyPatchCommandContext {
-                user_explicitly_approved_this_action,
+                user_explicitly_approved_this_action: *user_explicitly_approved_this_action,
-                changes: convert_apply_patch_to_protocol(&action),
+                changes: convert_apply_patch_to_protocol(action),
            },
        ),
        tool_name: tool_name.to_string(),
        otel_event_manager,
    };
-    let params = maybe_translate_shell_command(params, sess, turn_context);
+    let mode = match apply_patch_exec {
        Some(exec) => ExecutionMode::ApplyPatch(exec),
        None => ExecutionMode::Shell,
    };
    sess.services.executor.update_environment(
        turn_context.sandbox_policy.clone(),
        turn_context.cwd.clone(),
    );
    let prepared_exec = PreparedExec::new(
        exec_command_context,
        params,
        command_for_display,
        mode,
        Some(StdoutStream {
            sub_id: sub_id.clone(),
            call_id: call_id.clone(),
            tx_event: sess.tx_event.clone(),
        }),
        turn_context.shell_environment_policy.use_profile,
    );
    let output_result = sess
        .run_exec_with_events(
            turn_diff_tracker,
-            exec_command_context.clone(),
+            prepared_exec,
-            ExecInvokeArgs {
+            turn_context.approval_policy,
                params: params.clone(),
                sandbox_type,
                sandbox_policy: &turn_context.sandbox_policy,
                sandbox_cwd: &turn_context.cwd,
                codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
                stdout_stream: if exec_command_context.apply_patch.is_some() {
                    None
                } else {
                    Some(StdoutStream {
                        sub_id: sub_id.clone(),
                        call_id: call_id.clone(),
                        tx_event: sess.tx_event.clone(),
                    })
                },
            },
        )
        .await;
@@ -2897,154 +2736,16 @@ async fn handle_container_exec_with_params(
                Err(FunctionCallError::RespondToModel(content))
            }
        }
-        Err(CodexErr::Sandbox(error)) => {
+        Err(ExecError::Function(err)) => Err(err),
-            handle_sandbox_error(
+        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
-                tool_name,
+            FunctionCallError::RespondToModel(format_exec_output(&output)),
-                turn_diff_tracker,
+        ),
-                params,
+        Err(ExecError::Codex(err)) => Err(FunctionCallError::RespondToModel(format!(
-                exec_command_context,
+            "execution error: {err:?}"
                error,
                sandbox_type,
                sess,
                turn_context,
                &otel_event_manager,
            )
            .await
        }
        Err(e) => Err(FunctionCallError::RespondToModel(format!(
            "execution error: {e:?}"
        ))),
    }
 }
 #[allow(clippy::too_many_arguments)]
 async fn handle_sandbox_error(
    tool_name: &str,
    turn_diff_tracker: &mut TurnDiffTracker,
    params: ExecParams,
    exec_command_context: ExecCommandContext,
    error: SandboxErr,
    sandbox_type: SandboxType,
    sess: &Session,
    turn_context: &TurnContext,
    otel_event_manager: &OtelEventManager,
 ) -> Result<String, FunctionCallError> {
    let call_id = exec_command_context.call_id.clone();
    let sub_id = exec_command_context.sub_id.clone();
    let cwd = exec_command_context.cwd.clone();
    if let SandboxErr::Timeout { output } = &error {
        let content = format_exec_output(output);
        return Err(FunctionCallError::RespondToModel(content));
    }
    // Early out if either the user never wants to be asked for approval, or
    // we're letting the model manage escalation requests. Otherwise, continue
    match turn_context.approval_policy {
        AskForApproval::Never | AskForApproval::OnRequest => {
            return Err(FunctionCallError::RespondToModel(format!(
                "failed in sandbox {sandbox_type:?} with execution error: {error:?}"
            )));
        }
        AskForApproval::UnlessTrusted | AskForApproval::OnFailure => (),
    }
    // Note that when `error` is `SandboxErr::Denied`, it could be a false
    // positive. That is, it may have exited with a non-zero exit code, not
    // because the sandbox denied it, but because that is its expected behavior,
    // i.e., a grep command that did not match anything. Ideally we would
    // include additional metadata on the command to indicate whether non-zero
    // exit codes merit a retry.
    // For now, we categorically ask the user to retry without sandbox and
    // emit the raw error as a background event.
    sess.notify_background_event(&sub_id, format!("Execution failed: {error}"))
        .await;
    let decision = sess
        .request_command_approval(
            sub_id.clone(),
            call_id.clone(),
            params.command.clone(),
            cwd.clone(),
            Some("command failed; retry without sandbox?".to_string()),
        )
        .await;
    match decision {
        ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
            // Persist this command as pre‑approved for the
            // remainder of the session so future
            // executions skip the sandbox directly.
            // TODO(ragona): Isn't this a bug? It always saves the command in an | fork?
            sess.add_approved_command(params.command.clone()).await;
            // Inform UI we are retrying without sandbox.
            sess.notify_background_event(&sub_id, "retrying command without sandbox")
                .await;
            otel_event_manager.tool_decision(
                tool_name,
                call_id.as_str(),
                decision,
                ToolDecisionSource::User,
            );
            // This is an escalated retry; the policy will not be
            // examined and the sandbox has been set to `None`.
            let retry_output_result = sess
                .run_exec_with_events(
                    turn_diff_tracker,
                    exec_command_context.clone(),
                    ExecInvokeArgs {
                        params,
                        sandbox_type: SandboxType::None,
                        sandbox_policy: &turn_context.sandbox_policy,
                        sandbox_cwd: &turn_context.cwd,
                        codex_linux_sandbox_exe: &sess.services.codex_linux_sandbox_exe,
                        stdout_stream: if exec_command_context.apply_patch.is_some() {
                            None
                        } else {
                            Some(StdoutStream {
                                sub_id: sub_id.clone(),
                                call_id: call_id.clone(),
                                tx_event: sess.tx_event.clone(),
                            })
                        },
                    },
                )
                .await;
            match retry_output_result {
                Ok(retry_output) => {
                    let ExecToolCallOutput { exit_code, .. } = &retry_output;
                    let content = format_exec_output(&retry_output);
                    if *exit_code == 0 {
                        Ok(content)
                    } else {
                        Err(FunctionCallError::RespondToModel(content))
                    }
                }
                Err(e) => Err(FunctionCallError::RespondToModel(format!(
                    "retry failed: {e}"
                ))),
            }
        }
        decision @ (ReviewDecision::Denied | ReviewDecision::Abort) => {
            otel_event_manager.tool_decision(
                tool_name,
                call_id.as_str(),
                decision,
                ToolDecisionSource::User,
            );
            // Fall through to original failure handling.
            Err(FunctionCallError::RespondToModel(
                "exec command rejected by user".to_string(),
            ))
        }
    }
 }
 fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
    let ExecToolCallOutput {
        aggregated_output, ..
@@ -3303,6 +3004,8 @@ pub(crate) async fn exit_review_mode(
        .await;
 }
 use crate::executor::errors::ExecError;
 use crate::executor::linkers::PreparedExec;
 #[cfg(test)]
 pub(crate) use tests::make_session_and_context;
@@ -3616,9 +3319,13 @@ mod tests {
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::default(),
            rollout: Mutex::new(None),
            codex_linux_sandbox_exe: None,
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            executor: Executor::new(ExecutorConfig::new(
                turn_context.sandbox_policy.clone(),
                turn_context.cwd.clone(),
                None,
            )),
        };
        let session = Session {
            conversation_id,
@@ -3685,9 +3392,13 @@ mod tests {
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::default(),
            rollout: Mutex::new(None),
            codex_linux_sandbox_exe: None,
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
            executor: Executor::new(ExecutorConfig::new(
                config.sandbox_policy.clone(),
                config.cwd.clone(),
                None,
            )),
        };
        let session = Arc::new(Session {
            conversation_id,
--- a/codex-rs/core/src/executor/backends.rs
+++ b/codex-rs/core/src/executor/backends.rs
@@ -0,0 +1,101 @@
 use std::collections::HashMap;
 use std::env;
 use async_trait::async_trait;
 use crate::CODEX_APPLY_PATCH_ARG1;
 use crate::apply_patch::ApplyPatchExec;
 use crate::exec::ExecParams;
 use crate::function_tool::FunctionCallError;
 pub(crate) enum ExecutionMode {
    Shell,
    ApplyPatch(ApplyPatchExec),
 }
 #[async_trait]
 /// Backend-specific hooks that prepare and post-process execution requests for a
 /// given [`ExecutionMode`].
 pub(crate) trait ExecutionBackend: Send + Sync {
    fn prepare(
        &self,
        params: ExecParams,
        // Required for downcasting the apply_patch.
        mode: &ExecutionMode,
    ) -> Result<ExecParams, FunctionCallError>;
    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
        true
    }
 }
 static SHELL_BACKEND: ShellBackend = ShellBackend;
 static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend;
 pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend {
    match mode {
        ExecutionMode::Shell => &SHELL_BACKEND,
        ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND,
    }
 }
 struct ShellBackend;
 #[async_trait]
 impl ExecutionBackend for ShellBackend {
    fn prepare(
        &self,
        params: ExecParams,
        mode: &ExecutionMode,
    ) -> Result<ExecParams, FunctionCallError> {
        match mode {
            ExecutionMode::Shell => Ok(params),
            _ => Err(FunctionCallError::RespondToModel(
                "shell backend invoked with non-shell mode".to_string(),
            )),
        }
    }
 }
 struct ApplyPatchBackend;
 #[async_trait]
 impl ExecutionBackend for ApplyPatchBackend {
    fn prepare(
        &self,
        params: ExecParams,
        mode: &ExecutionMode,
    ) -> Result<ExecParams, FunctionCallError> {
        match mode {
            ExecutionMode::ApplyPatch(exec) => {
                let path_to_codex = env::current_exe()
                    .ok()
                    .map(|p| p.to_string_lossy().to_string())
                    .ok_or_else(|| {
                        FunctionCallError::RespondToModel(
                            "failed to determine path to codex executable".to_string(),
                        )
                    })?;
                let patch = exec.action.patch.clone();
                Ok(ExecParams {
                    command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
                    cwd: exec.action.cwd.clone(),
                    timeout_ms: params.timeout_ms,
                    // Run apply_patch with a minimal environment for determinism and to
                    // avoid leaking host environment variables into the patch process.
                    env: HashMap::new(),
                    with_escalated_permissions: params.with_escalated_permissions,
                    justification: params.justification,
                })
            }
            ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
                "apply_patch backend invoked without patch context".to_string(),
            )),
        }
    }
    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
        false
    }
 }
--- a/codex-rs/core/src/executor/cache.rs
+++ b/codex-rs/core/src/executor/cache.rs
@@ -0,0 +1,51 @@
 use std::collections::HashSet;
 use std::sync::Arc;
 use std::sync::Mutex;
 #[derive(Clone, Debug, Default)]
 /// Thread-safe store of user approvals so repeated commands can reuse
 /// previously granted trust.
 pub(crate) struct ApprovalCache {
    inner: Arc<Mutex<HashSet<Vec<String>>>>,
 }
 impl ApprovalCache {
    pub(crate) fn insert(&self, command: Vec<String>) {
        if command.is_empty() {
            return;
        }
        if let Ok(mut guard) = self.inner.lock() {
            guard.insert(command);
        }
    }
    pub(crate) fn snapshot(&self) -> HashSet<Vec<String>> {
        self.inner.lock().map(|g| g.clone()).unwrap_or_default()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use pretty_assertions::assert_eq;
    #[test]
    fn insert_ignores_empty_and_dedupes() {
        let cache = ApprovalCache::default();
        // Empty should be ignored
        cache.insert(vec![]);
        assert!(cache.snapshot().is_empty());
        // Insert a command and verify snapshot contains it
        let cmd = vec!["foo".to_string(), "bar".to_string()];
        cache.insert(cmd.clone());
        let snap1 = cache.snapshot();
        assert!(snap1.contains(&cmd));
        // Reinserting should not create duplicates
        cache.insert(cmd);
        let snap2 = cache.snapshot();
        assert_eq!(snap1, snap2);
    }
 }
--- a/codex-rs/core/src/executor/mod.rs
+++ b/codex-rs/core/src/executor/mod.rs
@@ -0,0 +1,64 @@
 mod backends;
 mod cache;
 mod runner;
 mod sandbox;
 pub(crate) use backends::ExecutionMode;
 pub(crate) use runner::ExecutionRequest;
 pub(crate) use runner::Executor;
 pub(crate) use runner::ExecutorConfig;
 pub(crate) use runner::normalize_exec_result;
 pub(crate) mod linkers {
    use crate::codex::ExecCommandContext;
    use crate::exec::ExecParams;
    use crate::exec::StdoutStream;
    use crate::executor::backends::ExecutionMode;
    use crate::executor::runner::ExecutionRequest;
    pub struct PreparedExec {
        pub(crate) context: ExecCommandContext,
        pub(crate) request: ExecutionRequest,
    }
    impl PreparedExec {
        pub fn new(
            context: ExecCommandContext,
            params: ExecParams,
            approval_command: Vec<String>,
            mode: ExecutionMode,
            stdout_stream: Option<StdoutStream>,
            use_shell_profile: bool,
        ) -> Self {
            let request = ExecutionRequest {
                params,
                approval_command,
                mode,
                stdout_stream,
                use_shell_profile,
            };
            Self { context, request }
        }
    }
 }
 pub mod errors {
    use crate::error::CodexErr;
    use crate::function_tool::FunctionCallError;
    use thiserror::Error;
    #[derive(Debug, Error)]
    pub enum ExecError {
        #[error(transparent)]
        Function(#[from] FunctionCallError),
        #[error(transparent)]
        Codex(#[from] CodexErr),
    }
    impl ExecError {
        pub(crate) fn rejection(msg: impl Into<String>) -> Self {
            FunctionCallError::RespondToModel(msg.into()).into()
        }
    }
 }
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -0,0 +1,408 @@
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::RwLock;
 use std::time::Duration;
 use super::backends::ExecutionMode;
 use super::backends::backend_for_mode;
 use super::cache::ApprovalCache;
 use crate::codex::ExecCommandContext;
 use crate::codex::Session;
 use crate::error::CodexErr;
 use crate::error::SandboxErr;
 use crate::error::get_error_message_ui;
 use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::exec::StdoutStream;
 use crate::exec::StreamOutput;
 use crate::exec::process_exec_tool_call;
 use crate::executor::errors::ExecError;
 use crate::executor::sandbox::select_sandbox;
 use crate::function_tool::FunctionCallError;
 use crate::protocol::AskForApproval;
 use crate::protocol::ReviewDecision;
 use crate::protocol::SandboxPolicy;
 use crate::shell;
 use codex_otel::otel_event_manager::ToolDecisionSource;
 #[derive(Clone, Debug)]
 pub(crate) struct ExecutorConfig {
    pub(crate) sandbox_policy: SandboxPolicy,
    pub(crate) sandbox_cwd: PathBuf,
    codex_linux_sandbox_exe: Option<PathBuf>,
 }
 impl ExecutorConfig {
    pub(crate) fn new(
        sandbox_policy: SandboxPolicy,
        sandbox_cwd: PathBuf,
        codex_linux_sandbox_exe: Option<PathBuf>,
    ) -> Self {
        Self {
            sandbox_policy,
            sandbox_cwd,
            codex_linux_sandbox_exe,
        }
    }
 }
 /// Coordinates sandbox selection, backend-specific preparation, and command
 /// execution for tool calls requested by the model.
 pub(crate) struct Executor {
    approval_cache: ApprovalCache,
    config: Arc<RwLock<ExecutorConfig>>,
 }
 impl Executor {
    pub(crate) fn new(config: ExecutorConfig) -> Self {
        Self {
            approval_cache: ApprovalCache::default(),
            config: Arc::new(RwLock::new(config)),
        }
    }
    /// Updates the sandbox policy and working directory used for future
    /// executions without recreating the executor.
    pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) {
        if let Ok(mut cfg) = self.config.write() {
            cfg.sandbox_policy = sandbox_policy;
            cfg.sandbox_cwd = sandbox_cwd;
        }
    }
    /// Runs a prepared execution request end-to-end: prepares parameters, decides on
    /// sandbox placement (prompting the user when necessary), launches the command,
    /// and lets the backend post-process the final output.
    pub(crate) async fn run(
        &self,
        mut request: ExecutionRequest,
        session: &Session,
        approval_policy: AskForApproval,
        context: &ExecCommandContext,
    ) -> Result<ExecToolCallOutput, ExecError> {
        if matches!(request.mode, ExecutionMode::Shell) {
            request.params =
                maybe_translate_shell_command(request.params, session, request.use_shell_profile);
        }
        // Step 1: Normalise parameters via the selected backend.
        let backend = backend_for_mode(&request.mode);
        let stdout_stream = if backend.stream_stdout(&request.mode) {
            request.stdout_stream.clone()
        } else {
            None
        };
        request.params = backend
            .prepare(request.params, &request.mode)
            .map_err(ExecError::from)?;
        // Step 2: Snapshot sandbox configuration so it stays stable for this run.
        let config = self
            .config
            .read()
            .map_err(|_| ExecError::rejection("executor config poisoned"))?
            .clone();
        // Step 3: Decide sandbox placement, prompting for approval when needed.
        let sandbox_decision = select_sandbox(
            &request,
            approval_policy,
            self.approval_cache.snapshot(),
            &config,
            session,
            &context.sub_id,
            &context.call_id,
            &context.otel_event_manager,
        )
        .await?;
        if sandbox_decision.record_session_approval {
            self.approval_cache.insert(request.approval_command.clone());
        }
        // Step 4: Launch the command within the chosen sandbox.
        let first_attempt = self
            .spawn(
                request.params.clone(),
                sandbox_decision.initial_sandbox,
                &config,
                stdout_stream.clone(),
            )
            .await;
        // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
        match first_attempt {
            Ok(output) => Ok(output),
            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
                Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into())
            }
            Err(CodexErr::Sandbox(error)) => {
                if sandbox_decision.escalate_on_failure {
                    self.retry_without_sandbox(
                        &request,
                        &config,
                        session,
                        context,
                        stdout_stream,
                        error,
                    )
                    .await
                } else {
                    let message = sandbox_failure_message(error);
                    Err(ExecError::rejection(message))
                }
            }
            Err(err) => Err(err.into()),
        }
    }
    /// Fallback path invoked when a sandboxed run is denied so the user can
    /// approve rerunning without isolation.
    async fn retry_without_sandbox(
        &self,
        request: &ExecutionRequest,
        config: &ExecutorConfig,
        session: &Session,
        context: &ExecCommandContext,
        stdout_stream: Option<StdoutStream>,
        sandbox_error: SandboxErr,
    ) -> Result<ExecToolCallOutput, ExecError> {
        session
            .notify_background_event(
                &context.sub_id,
                format!("Execution failed: {sandbox_error}"),
            )
            .await;
        let decision = session
            .request_command_approval(
                context.sub_id.to_string(),
                context.call_id.to_string(),
                request.approval_command.clone(),
                request.params.cwd.clone(),
                Some("command failed; retry without sandbox?".to_string()),
            )
            .await;
        context.otel_event_manager.tool_decision(
            &context.tool_name,
            &context.call_id,
            decision,
            ToolDecisionSource::User,
        );
        match decision {
            ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
                if matches!(decision, ReviewDecision::ApprovedForSession) {
                    self.approval_cache.insert(request.approval_command.clone());
                }
                session
                    .notify_background_event(&context.sub_id, "retrying command without sandbox")
                    .await;
                let retry_output = self
                    .spawn(
                        request.params.clone(),
                        SandboxType::None,
                        config,
                        stdout_stream,
                    )
                    .await?;
                Ok(retry_output)
            }
            ReviewDecision::Denied | ReviewDecision::Abort => {
                Err(ExecError::rejection("exec command rejected by user"))
            }
        }
    }
    async fn spawn(
        &self,
        params: ExecParams,
        sandbox: SandboxType,
        config: &ExecutorConfig,
        stdout_stream: Option<StdoutStream>,
    ) -> Result<ExecToolCallOutput, CodexErr> {
        process_exec_tool_call(
            params,
            sandbox,
            &config.sandbox_policy,
            &config.sandbox_cwd,
            &config.codex_linux_sandbox_exe,
            stdout_stream,
        )
        .await
    }
 }
 fn maybe_translate_shell_command(
    params: ExecParams,
    session: &Session,
    use_shell_profile: bool,
 ) -> ExecParams {
    let should_translate =
        matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile;
    if should_translate
        && let Some(command) = session
            .user_shell()
            .format_default_shell_invocation(params.command.clone())
    {
        return ExecParams { command, ..params };
    }
    params
 }
 fn sandbox_failure_message(error: SandboxErr) -> String {
    let codex_error = CodexErr::Sandbox(error);
    let friendly = get_error_message_ui(&codex_error);
    format!("failed in sandbox: {friendly}")
 }
 pub(crate) struct ExecutionRequest {
    pub params: ExecParams,
    pub approval_command: Vec<String>,
    pub mode: ExecutionMode,
    pub stdout_stream: Option<StdoutStream>,
    pub use_shell_profile: bool,
 }
 pub(crate) struct NormalizedExecOutput<'a> {
    borrowed: Option<&'a ExecToolCallOutput>,
    synthetic: Option<ExecToolCallOutput>,
 }
 impl<'a> NormalizedExecOutput<'a> {
    pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput {
        match (self.borrowed, self.synthetic.as_ref()) {
            (Some(output), _) => output,
            (None, Some(output)) => output,
            (None, None) => unreachable!("normalized exec output missing data"),
        }
    }
 }
 /// Converts a raw execution result into a uniform view that always exposes an
 /// [`ExecToolCallOutput`], synthesizing error output when the command fails
 /// before producing a response.
 pub(crate) fn normalize_exec_result(
    result: &Result<ExecToolCallOutput, ExecError>,
 ) -> NormalizedExecOutput<'_> {
    match result {
        Ok(output) => NormalizedExecOutput {
            borrowed: Some(output),
            synthetic: None,
        },
        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => {
            NormalizedExecOutput {
                borrowed: Some(output.as_ref()),
                synthetic: None,
            }
        }
        Err(err) => {
            let message = match err {
                ExecError::Function(FunctionCallError::RespondToModel(msg)) => msg.clone(),
                ExecError::Codex(e) => get_error_message_ui(e),
            };
            let synthetic = ExecToolCallOutput {
                exit_code: -1,
                stdout: StreamOutput::new(String::new()),
                stderr: StreamOutput::new(message.clone()),
                aggregated_output: StreamOutput::new(message),
                duration: Duration::default(),
                timed_out: false,
            };
            NormalizedExecOutput {
                borrowed: None,
                synthetic: Some(synthetic),
            }
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::error::CodexErr;
    use crate::error::EnvVarError;
    use crate::error::SandboxErr;
    use crate::exec::StreamOutput;
    use pretty_assertions::assert_eq;
    fn make_output(text: &str) -> ExecToolCallOutput {
        ExecToolCallOutput {
            exit_code: 1,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new(text.to_string()),
            duration: Duration::from_millis(123),
            timed_out: false,
        }
    }
    #[test]
    fn normalize_success_borrows() {
        let out = make_output("ok");
        let result: Result<ExecToolCallOutput, ExecError> = Ok(out);
        let normalized = normalize_exec_result(&result);
        assert_eq!(normalized.event_output().aggregated_output.text, "ok");
    }
    #[test]
    fn normalize_timeout_borrows_embedded_output() {
        let out = make_output("timed out payload");
        let err = CodexErr::Sandbox(SandboxErr::Timeout {
            output: Box::new(out),
        });
        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(err));
        let normalized = normalize_exec_result(&result);
        assert_eq!(
            normalized.event_output().aggregated_output.text,
            "timed out payload"
        );
    }
    #[test]
    fn sandbox_failure_message_uses_denied_stderr() {
        let output = ExecToolCallOutput {
            exit_code: 101,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new("sandbox stderr".to_string()),
            aggregated_output: StreamOutput::new(String::new()),
            duration: Duration::from_millis(10),
            timed_out: false,
        };
        let err = SandboxErr::Denied {
            output: Box::new(output),
        };
        let message = sandbox_failure_message(err);
        assert_eq!(message, "failed in sandbox: sandbox stderr");
    }
    #[test]
    fn normalize_function_error_synthesizes_payload() {
        let err = FunctionCallError::RespondToModel("boom".to_string());
        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Function(err));
        let normalized = normalize_exec_result(&result);
        assert_eq!(normalized.event_output().aggregated_output.text, "boom");
    }
    #[test]
    fn normalize_codex_error_synthesizes_user_message() {
        // Use a simple EnvVar error which formats to a clear message
        let e = CodexErr::EnvVar(EnvVarError {
            var: "FOO".to_string(),
            instructions: Some("set it".to_string()),
        });
        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(e));
        let normalized = normalize_exec_result(&result);
        assert!(
            normalized
                .event_output()
                .aggregated_output
                .text
                .contains("Missing environment variable: `FOO`"),
            "expected synthesized user-friendly message"
        );
    }
 }
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -0,0 +1,405 @@
 use crate::apply_patch::ApplyPatchExec;
 use crate::codex::Session;
 use crate::exec::SandboxType;
 use crate::executor::ExecutionMode;
 use crate::executor::ExecutionRequest;
 use crate::executor::ExecutorConfig;
 use crate::executor::errors::ExecError;
 use crate::safety::SafetyCheck;
 use crate::safety::assess_command_safety;
 use crate::safety::assess_patch_safety;
 use codex_otel::otel_event_manager::OtelEventManager;
 use codex_otel::otel_event_manager::ToolDecisionSource;
 use codex_protocol::protocol::AskForApproval;
 use codex_protocol::protocol::ReviewDecision;
 use std::collections::HashSet;
 /// Sandbox placement options selected for an execution run, including whether
 /// to escalate after failures and whether approvals should persist.
 pub(crate) struct SandboxDecision {
    pub(crate) initial_sandbox: SandboxType,
    pub(crate) escalate_on_failure: bool,
    pub(crate) record_session_approval: bool,
 }
 impl SandboxDecision {
    fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self {
        Self {
            initial_sandbox: sandbox,
            escalate_on_failure,
            record_session_approval: false,
        }
    }
    fn user_override(record_session_approval: bool) -> Self {
        Self {
            initial_sandbox: SandboxType::None,
            escalate_on_failure: false,
            record_session_approval,
        }
    }
 }
 fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
    matches!(
        (approval, sandbox),
        (
            AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
            SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
        )
    )
 }
 /// Determines how a command should be sandboxed, prompting the user when
 /// policy requires explicit approval.
 #[allow(clippy::too_many_arguments)]
 pub async fn select_sandbox(
    request: &ExecutionRequest,
    approval_policy: AskForApproval,
    approval_cache: HashSet<Vec<String>>,
    config: &ExecutorConfig,
    session: &Session,
    sub_id: &str,
    call_id: &str,
    otel_event_manager: &OtelEventManager,
 ) -> Result<SandboxDecision, ExecError> {
    match &request.mode {
        ExecutionMode::Shell => {
            select_shell_sandbox(
                request,
                approval_policy,
                approval_cache,
                config,
                session,
                sub_id,
                call_id,
                otel_event_manager,
            )
            .await
        }
        ExecutionMode::ApplyPatch(exec) => {
            select_apply_patch_sandbox(exec, approval_policy, config)
        }
    }
 }
 #[allow(clippy::too_many_arguments)]
 async fn select_shell_sandbox(
    request: &ExecutionRequest,
    approval_policy: AskForApproval,
    approved_snapshot: HashSet<Vec<String>>,
    config: &ExecutorConfig,
    session: &Session,
    sub_id: &str,
    call_id: &str,
    otel_event_manager: &OtelEventManager,
 ) -> Result<SandboxDecision, ExecError> {
    let command_for_safety = if request.approval_command.is_empty() {
        request.params.command.clone()
    } else {
        request.approval_command.clone()
    };
    let safety = assess_command_safety(
        &command_for_safety,
        approval_policy,
        &config.sandbox_policy,
        &approved_snapshot,
        request.params.with_escalated_permissions.unwrap_or(false),
    );
    match safety {
        SafetyCheck::AutoApprove {
            sandbox_type,
            user_explicitly_approved,
        } => {
            let mut decision = SandboxDecision::auto(
                sandbox_type,
                should_escalate_on_failure(approval_policy, sandbox_type),
            );
            if user_explicitly_approved {
                decision.record_session_approval = true;
            }
            let (decision_for_event, source) = if user_explicitly_approved {
                (ReviewDecision::ApprovedForSession, ToolDecisionSource::User)
            } else {
                (ReviewDecision::Approved, ToolDecisionSource::Config)
            };
            otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source);
            Ok(decision)
        }
        SafetyCheck::AskUser => {
            let decision = session
                .request_command_approval(
                    sub_id.to_string(),
                    call_id.to_string(),
                    request.approval_command.clone(),
                    request.params.cwd.clone(),
                    request.params.justification.clone(),
                )
                .await;
            otel_event_manager.tool_decision(
                "local_shell",
                call_id,
                decision,
                ToolDecisionSource::User,
            );
            match decision {
                ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
                ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
                ReviewDecision::Denied | ReviewDecision::Abort => {
                    Err(ExecError::rejection("exec command rejected by user"))
                }
            }
        }
        SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!(
            "exec command rejected: {reason}"
        ))),
    }
 }
 fn select_apply_patch_sandbox(
    exec: &ApplyPatchExec,
    approval_policy: AskForApproval,
    config: &ExecutorConfig,
 ) -> Result<SandboxDecision, ExecError> {
    if exec.user_explicitly_approved_this_action {
        return Ok(SandboxDecision::user_override(false));
    }
    match assess_patch_safety(
        &exec.action,
        approval_policy,
        &config.sandbox_policy,
        &config.sandbox_cwd,
    ) {
        SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto(
            sandbox_type,
            should_escalate_on_failure(approval_policy, sandbox_type),
        )),
        SafetyCheck::AskUser => Err(ExecError::rejection(
            "patch requires approval but none was recorded",
        )),
        SafetyCheck::Reject { reason } => {
            Err(ExecError::rejection(format!("patch rejected: {reason}")))
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::codex::make_session_and_context;
    use crate::exec::ExecParams;
    use crate::function_tool::FunctionCallError;
    use crate::protocol::SandboxPolicy;
    use codex_apply_patch::ApplyPatchAction;
    use pretty_assertions::assert_eq;
    #[tokio::test]
    async fn select_apply_patch_user_override_when_explicit() {
        let (session, ctx) = make_session_and_context();
        let tmp = tempfile::tempdir().expect("tmp");
        let p = tmp.path().join("a.txt");
        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
        let exec = ApplyPatchExec {
            action,
            user_explicitly_approved_this_action: true,
        };
        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                command: vec!["apply_patch".into()],
                cwd: std::env::temp_dir(),
                timeout_ms: None,
                env: std::collections::HashMap::new(),
                with_escalated_permissions: None,
                justification: None,
            },
            approval_command: vec!["apply_patch".into()],
            mode: ExecutionMode::ApplyPatch(exec),
            stdout_stream: None,
            use_shell_profile: false,
        };
        let otel_event_manager = ctx.client.get_otel_event_manager();
        let decision = select_sandbox(
            &request,
            AskForApproval::OnRequest,
            Default::default(),
            &cfg,
            &session,
            "sub",
            "call",
            &otel_event_manager,
        )
        .await
        .expect("ok");
        // Explicit user override runs without sandbox
        assert_eq!(decision.initial_sandbox, SandboxType::None);
        assert_eq!(decision.escalate_on_failure, false);
    }
    #[tokio::test]
    async fn select_apply_patch_autoapprove_in_danger() {
        let (session, ctx) = make_session_and_context();
        let tmp = tempfile::tempdir().expect("tmp");
        let p = tmp.path().join("a.txt");
        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
        let exec = ApplyPatchExec {
            action,
            user_explicitly_approved_this_action: false,
        };
        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                command: vec!["apply_patch".into()],
                cwd: std::env::temp_dir(),
                timeout_ms: None,
                env: std::collections::HashMap::new(),
                with_escalated_permissions: None,
                justification: None,
            },
            approval_command: vec!["apply_patch".into()],
            mode: ExecutionMode::ApplyPatch(exec),
            stdout_stream: None,
            use_shell_profile: false,
        };
        let otel_event_manager = ctx.client.get_otel_event_manager();
        let decision = select_sandbox(
            &request,
            AskForApproval::OnRequest,
            Default::default(),
            &cfg,
            &session,
            "sub",
            "call",
            &otel_event_manager,
        )
        .await
        .expect("ok");
        // On platforms with a sandbox, DangerFullAccess still prefers it
        let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None);
        assert_eq!(decision.initial_sandbox, expected);
        assert_eq!(decision.escalate_on_failure, false);
    }
    #[tokio::test]
    async fn select_apply_patch_requires_approval_on_unless_trusted() {
        let (session, ctx) = make_session_and_context();
        let tempdir = tempfile::tempdir().expect("tmpdir");
        let p = tempdir.path().join("a.txt");
        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
        let exec = ApplyPatchExec {
            action,
            user_explicitly_approved_this_action: false,
        };
        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                command: vec!["apply_patch".into()],
                cwd: std::env::temp_dir(),
                timeout_ms: None,
                env: std::collections::HashMap::new(),
                with_escalated_permissions: None,
                justification: None,
            },
            approval_command: vec!["apply_patch".into()],
            mode: ExecutionMode::ApplyPatch(exec),
            stdout_stream: None,
            use_shell_profile: false,
        };
        let otel_event_manager = ctx.client.get_otel_event_manager();
        let result = select_sandbox(
            &request,
            AskForApproval::UnlessTrusted,
            Default::default(),
            &cfg,
            &session,
            "sub",
            "call",
            &otel_event_manager,
        )
        .await;
        match result {
            Ok(_) => panic!("expected error"),
            Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => {
                assert!(msg.contains("requires approval"))
            }
            Err(other) => panic!("unexpected error: {other:?}"),
        }
    }
    #[tokio::test]
    async fn select_shell_autoapprove_in_danger_mode() {
        let (session, ctx) = make_session_and_context();
        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                command: vec!["some-unknown".into()],
                cwd: std::env::temp_dir(),
                timeout_ms: None,
                env: std::collections::HashMap::new(),
                with_escalated_permissions: None,
                justification: None,
            },
            approval_command: vec!["some-unknown".into()],
            mode: ExecutionMode::Shell,
            stdout_stream: None,
            use_shell_profile: false,
        };
        let otel_event_manager = ctx.client.get_otel_event_manager();
        let decision = select_sandbox(
            &request,
            AskForApproval::OnRequest,
            Default::default(),
            &cfg,
            &session,
            "sub",
            "call",
            &otel_event_manager,
        )
        .await
        .expect("ok");
        assert_eq!(decision.initial_sandbox, SandboxType::None);
        assert_eq!(decision.escalate_on_failure, false);
    }
    #[cfg(any(target_os = "macos", target_os = "linux"))]
    #[tokio::test]
    async fn select_shell_escalates_on_failure_with_platform_sandbox() {
        let (session, ctx) = make_session_and_context();
        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
        let request = ExecutionRequest {
            params: ExecParams {
                // Unknown command => untrusted but not flagged dangerous
                command: vec!["some-unknown".into()],
                cwd: std::env::temp_dir(),
                timeout_ms: None,
                env: std::collections::HashMap::new(),
                with_escalated_permissions: None,
                justification: None,
            },
            approval_command: vec!["some-unknown".into()],
            mode: ExecutionMode::Shell,
            stdout_stream: None,
            use_shell_profile: false,
        };
        let otel_event_manager = ctx.client.get_otel_event_manager();
        let decision = select_sandbox(
            &request,
            AskForApproval::OnFailure,
            Default::default(),
            &cfg,
            &session,
            "sub",
            "call",
            &otel_event_manager,
        )
        .await
        .expect("ok");
        // On macOS/Linux we should have a platform sandbox and escalate on failure
        assert_ne!(decision.initial_sandbox, SandboxType::None);
        assert_eq!(decision.escalate_on_failure, true);
    }
 }
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -27,6 +27,7 @@ pub mod error;
 pub mod exec;
 mod exec_command;
 pub mod exec_env;
 pub mod executor;
 mod flags;
 pub mod git_info;
 pub mod landlock;
--- a/codex-rs/core/src/safety.rs
+++ b/codex-rs/core/src/safety.rs
@@ -125,9 +125,10 @@ pub fn assess_command_safety(
    // the session _because_ they know it needs to run outside a sandbox.
    if is_known_safe_command(command) || approved.contains(command) {
        let user_explicitly_approved = approved.contains(command);
        return SafetyCheck::AutoApprove {
            sandbox_type: SandboxType::None,
-            user_explicitly_approved: false,
+            user_explicitly_approved,
        };
    }
@@ -380,7 +381,7 @@ mod tests {
            safety_check,
            SafetyCheck::AutoApprove {
                sandbox_type: SandboxType::None,
-                user_explicitly_approved: false,
+                user_explicitly_approved: true,
            }
        );
    }
--- a/codex-rs/core/src/state/service.rs
+++ b/codex-rs/core/src/state/service.rs
@@ -1,9 +1,9 @@
 use crate::RolloutRecorder;
 use crate::exec_command::ExecSessionManager;
 use crate::executor::Executor;
 use crate::mcp_connection_manager::McpConnectionManager;
 use crate::unified_exec::UnifiedExecSessionManager;
 use crate::user_notification::UserNotifier;
 use std::path::PathBuf;
 use tokio::sync::Mutex;
 pub(crate) struct SessionServices {
@@ -12,7 +12,7 @@ pub(crate) struct SessionServices {
    pub(crate) unified_exec_manager: UnifiedExecSessionManager,
    pub(crate) notifier: UserNotifier,
    pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
    pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
    pub(crate) user_shell: crate::shell::Shell,
    pub(crate) show_raw_agent_reasoning: bool,
    pub(crate) executor: Executor,
 }
--- a/codex-rs/core/src/state/session.rs
+++ b/codex-rs/core/src/state/session.rs
@@ -1,7 +1,5 @@
 //! Session-wide mutable state.
 use std::collections::HashSet;
 use codex_protocol::models::ResponseItem;
 use crate::conversation_history::ConversationHistory;
@@ -12,7 +10,6 @@ use crate::protocol::TokenUsageInfo;
 /// Persistent, session-scoped state previously stored directly on `Session`.
 #[derive(Default)]
 pub(crate) struct SessionState {
    pub(crate) approved_commands: HashSet<Vec<String>>,
    pub(crate) history: ConversationHistory,
    pub(crate) token_info: Option<TokenUsageInfo>,
    pub(crate) latest_rate_limits: Option<RateLimitSnapshot>,
@@ -44,15 +41,6 @@ impl SessionState {
        self.history.replace(items);
    }
    // Approved command helpers
    pub(crate) fn add_approved_command(&mut self, cmd: Vec<String>) {
        self.approved_commands.insert(cmd);
    }
    pub(crate) fn approved_commands_ref(&self) -> &HashSet<Vec<String>> {
        &self.approved_commands
    }
    // Token/rate limit helpers
    pub(crate) fn update_token_info_from_usage(
        &mut self,
--- a/codex-rs/core/tests/suite/seatbelt.rs
+++ b/codex-rs/core/tests/suite/seatbelt.rs
@@ -169,6 +169,12 @@ async fn python_getpwuid_works_under_seatbelt() {
        return;
    }
    // For local dev.
    if which::which("python3").is_err() {
        eprintln!("python3 not found in PATH, skipping test.");
        return;
    }
    // ReadOnly is sufficient here since we are only exercising user lookup.
    let policy = SandboxPolicy::ReadOnly;
    let command_cwd = std::env::current_dir().expect("getcwd");