chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand the structure
2025-10-20 20:57:37 +01:00
parent c84fc83222
commit 5e4f3bbb0b
59 changed files with 2630 additions and 3374 deletions
--- a/codex-rs/core/src/chat_completions.rs
+++ b/codex-rs/core/src/chat_completions.rs
@@ -11,7 +11,7 @@ use crate::error::Result;
 use crate::error::RetryLimitReachedError;
 use crate::error::UnexpectedResponseError;
 use crate::model_family::ModelFamily;
-use crate::openai_tools::create_tools_json_for_chat_completions_api;
+use crate::tools::spec::create_tools_json_for_chat_completions_api;
 use crate::util::backoff;
 use bytes::Bytes;
 use codex_otel::otel_event_manager::OtelEventManager;
--- a/codex-rs/core/src/client.rs
+++ b/codex-rs/core/src/client.rs
@@ -45,12 +45,12 @@ use crate::model_family::ModelFamily;
 use crate::model_provider_info::ModelProviderInfo;
 use crate::model_provider_info::WireApi;
 use crate::openai_model_info::get_model_info;
-use crate::openai_tools::create_tools_json_for_responses_api;
 use crate::protocol::RateLimitSnapshot;
 use crate::protocol::RateLimitWindow;
 use crate::protocol::TokenUsage;
 use crate::state::TaskKind;
 use crate::token_data::PlanType;
+use crate::tools::spec::create_tools_json_for_responses_api;
 use crate::util::backoff;
 use chrono::DateTime;
 use chrono::Utc;
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -281,7 +281,7 @@ pub(crate) struct ResponsesApiRequest<'a> {
 }

 pub(crate) mod tools {
-    use crate::openai_tools::JsonSchema;
+    use crate::tools::spec::JsonSchema;
    use serde::Deserialize;
    use serde::Serialize;

--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -8,6 +8,7 @@ use crate::AuthManager;
 use crate::client_common::REVIEW_PROMPT;
 use crate::event_mapping::map_response_item_to_event_messages;
 use crate::function_tool::FunctionCallError;
+use crate::parse_command::parse_command;
 use crate::review_format::format_review_findings_block;
 use crate::terminal;
 use crate::user_notification::UserNotifier;
@@ -56,22 +57,14 @@ use crate::conversation_history::ConversationHistory;
 use crate::environment_context::EnvironmentContext;
 use crate::error::CodexErr;
 use crate::error::Result as CodexResult;
-use crate::exec::ExecToolCallOutput;
 #[cfg(test)]
 use crate::exec::StreamOutput;
-use crate::exec_command::ExecCommandParams;
-use crate::exec_command::ExecSessionManager;
-use crate::exec_command::WriteStdinParams;
-use crate::executor::Executor;
-use crate::executor::ExecutorConfig;
-use crate::executor::normalize_exec_result;
+// Removed: legacy executor wiring replaced by ToolOrchestrator flows.
+// legacy normalize_exec_result no longer used after orchestrator migration
 use crate::mcp::auth::compute_auth_statuses;
 use crate::mcp_connection_manager::McpConnectionManager;
 use crate::model_family::find_family_for_model;
 use crate::openai_model_info::get_model_info;
-use crate::openai_tools::ToolsConfig;
-use crate::openai_tools::ToolsConfigParams;
-use crate::parse_command::parse_command;
 use crate::project_doc::get_user_instructions;
 use crate::protocol::AgentMessageDeltaEvent;
 use crate::protocol::AgentReasoningDeltaEvent;
@@ -84,13 +77,9 @@ use crate::protocol::ErrorEvent;
 use crate::protocol::Event;
 use crate::protocol::EventMsg;
 use crate::protocol::ExecApprovalRequestEvent;
-use crate::protocol::ExecCommandBeginEvent;
-use crate::protocol::ExecCommandEndEvent;
 use crate::protocol::InputItem;
 use crate::protocol::ListCustomPromptsResponseEvent;
 use crate::protocol::Op;
-use crate::protocol::PatchApplyBeginEvent;
-use crate::protocol::PatchApplyEndEvent;
 use crate::protocol::RateLimitSnapshot;
 use crate::protocol::ReviewDecision;
 use crate::protocol::ReviewOutputEvent;
@@ -114,8 +103,10 @@ use crate::tasks::RegularTask;
 use crate::tasks::ReviewTask;
 use crate::tools::ToolRouter;
 use crate::tools::context::SharedTurnDiffTracker;
-use crate::tools::format_exec_output_str;
 use crate::tools::parallel::ToolCallRuntime;
+use crate::tools::sandboxing::ApprovalStore;
+use crate::tools::spec::ToolsConfig;
+use crate::tools::spec::ToolsConfigParams;
 use crate::turn_diff_tracker::TurnDiffTracker;
 use crate::unified_exec::UnifiedExecSessionManager;
 use crate::user_instructions::UserInstructions;
@@ -272,6 +263,7 @@ pub(crate) struct TurnContext {
    pub(crate) tools_config: ToolsConfig,
    pub(crate) is_review_mode: bool,
    pub(crate) final_output_json_schema: Option<Value>,
+    pub(crate) codex_linux_sandbox_exe: Option<PathBuf>,
 }

 impl TurnContext {
@@ -404,6 +396,7 @@ impl Session {
            tools_config,
            is_review_mode: false,
            final_output_json_schema: None,
+            codex_linux_sandbox_exe: config.codex_linux_sandbox_exe.clone(),
        }
    }

@@ -561,19 +554,14 @@ impl Session {

        let services = SessionServices {
            mcp_connection_manager,
-            session_manager: ExecSessionManager::default(),
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(config.notify.clone()),
            rollout: Mutex::new(Some(rollout_recorder)),
            user_shell: default_shell,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
-            executor: Executor::new(ExecutorConfig::new(
-                session_configuration.sandbox_policy.clone(),
-                session_configuration.cwd.clone(),
-                config.codex_linux_sandbox_exe.clone(),
-            )),
            auth_manager: Arc::clone(&auth_manager),
            otel_event_manager,
+            tool_approvals: Mutex::new(ApprovalStore::default()),
        };

        let sess = Arc::new(Session {
@@ -655,13 +643,12 @@ impl Session {
    }

    pub(crate) async fn new_turn(&self, updates: SessionSettingsUpdate) -> Arc<TurnContext> {
-        let current_configuration = self.state.lock().await.session_configuration.clone();
-        let session_configuration = current_configuration.apply(&updates);
-
-        self.services.executor.update_environment(
-            session_configuration.sandbox_policy.clone(),
-            session_configuration.cwd.clone(),
-        );
+        let session_configuration = {
+            let mut state = self.state.lock().await;
+            let session_configuration = state.session_configuration.clone().apply(&updates);
+            state.session_configuration = session_configuration.clone();
+            session_configuration
+        };

        let mut turn_context: TurnContext = Self::make_turn_context(
            Some(Arc::clone(&self.services.auth_manager)),
@@ -965,168 +952,6 @@ impl Session {
        }
    }

-    async fn on_exec_command_begin(
-        &self,
-        turn_diff_tracker: SharedTurnDiffTracker,
-        exec_command_context: ExecCommandContext,
-    ) {
-        let ExecCommandContext {
-            sub_id,
-            call_id,
-            command_for_display,
-            cwd,
-            apply_patch,
-            ..
-        } = exec_command_context;
-        let msg = match apply_patch {
-            Some(ApplyPatchCommandContext {
-                user_explicitly_approved_this_action,
-                changes,
-            }) => {
-                {
-                    let mut tracker = turn_diff_tracker.lock().await;
-                    tracker.on_patch_begin(&changes);
-                }
-
-                EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
-                    call_id,
-                    auto_approved: !user_explicitly_approved_this_action,
-                    changes,
-                })
-            }
-            None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
-                call_id,
-                command: command_for_display.clone(),
-                cwd,
-                parsed_cmd: parse_command(&command_for_display),
-            }),
-        };
-        let event = Event {
-            id: sub_id.to_string(),
-            msg,
-        };
-        self.send_event(event).await;
-    }
-
-    async fn on_exec_command_end(
-        &self,
-        turn_diff_tracker: SharedTurnDiffTracker,
-        sub_id: &str,
-        call_id: &str,
-        output: &ExecToolCallOutput,
-        is_apply_patch: bool,
-    ) {
-        let ExecToolCallOutput {
-            stdout,
-            stderr,
-            aggregated_output,
-            duration,
-            exit_code,
-            timed_out: _,
-            ..
-        } = output;
-        // Send full stdout/stderr to clients; do not truncate.
-        let stdout = stdout.text.clone();
-        let stderr = stderr.text.clone();
-        let formatted_output = format_exec_output_str(output);
-        let aggregated_output: String = aggregated_output.text.clone();
-
-        let msg = if is_apply_patch {
-            EventMsg::PatchApplyEnd(PatchApplyEndEvent {
-                call_id: call_id.to_string(),
-                stdout,
-                stderr,
-                success: *exit_code == 0,
-            })
-        } else {
-            EventMsg::ExecCommandEnd(ExecCommandEndEvent {
-                call_id: call_id.to_string(),
-                stdout,
-                stderr,
-                aggregated_output,
-                exit_code: *exit_code,
-                duration: *duration,
-                formatted_output,
-            })
-        };
-
-        let event = Event {
-            id: sub_id.to_string(),
-            msg,
-        };
-        self.send_event(event).await;
-
-        // If this is an apply_patch, after we emit the end patch, emit a second event
-        // with the full turn diff if there is one.
-        if is_apply_patch {
-            let unified_diff = {
-                let mut tracker = turn_diff_tracker.lock().await;
-                tracker.get_unified_diff()
-            };
-            if let Ok(Some(unified_diff)) = unified_diff {
-                let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff });
-                let event = Event {
-                    id: sub_id.into(),
-                    msg,
-                };
-                self.send_event(event).await;
-            }
-        }
-    }
-    /// Runs the exec tool call and emits events for the begin and end of the
-    /// command even on error.
-    ///
-    /// Returns the output of the exec tool call.
-    pub(crate) async fn run_exec_with_events(
-        &self,
-        turn_diff_tracker: SharedTurnDiffTracker,
-        prepared: PreparedExec,
-        approval_policy: AskForApproval,
-    ) -> Result<ExecToolCallOutput, ExecError> {
-        let PreparedExec { context, request } = prepared;
-        let is_apply_patch = context.apply_patch.is_some();
-        let sub_id = context.sub_id.clone();
-        let call_id = context.call_id.clone();
-
-        let begin_turn_diff = turn_diff_tracker.clone();
-        let begin_context = context.clone();
-        let session = self;
-        let result = self
-            .services
-            .executor
-            .run(request, self, approval_policy, &context, move || {
-                let turn_diff = begin_turn_diff.clone();
-                let ctx = begin_context.clone();
-                async move {
-                    session.on_exec_command_begin(turn_diff, ctx).await;
-                }
-            })
-            .await;
-
-        if matches!(
-            &result,
-            Err(ExecError::Function(FunctionCallError::Denied(_)))
-        ) {
-            return result;
-        }
-
-        let normalized = normalize_exec_result(&result);
-        let borrowed = normalized.event_output();
-
-        self.on_exec_command_end(
-            turn_diff_tracker,
-            &sub_id,
-            &call_id,
-            borrowed,
-            is_apply_patch,
-        )
-        .await;
-
-        drop(normalized);
-
-        result
-    }
-
    /// Helper that emits a BackgroundEvent with the given message. This keeps
    /// the call‑sites terse so adding more diagnostics does not clutter the
    /// core agent logic.
@@ -1235,43 +1060,6 @@ impl Session {
            .parse_tool_name(tool_name)
    }

-    pub(crate) async fn handle_exec_command_tool(
-        &self,
-        params: ExecCommandParams,
-    ) -> Result<String, FunctionCallError> {
-        let result = self
-            .services
-            .session_manager
-            .handle_exec_command_request(params)
-            .await;
-        match result {
-            Ok(output) => Ok(output.to_text_output()),
-            Err(err) => Err(FunctionCallError::RespondToModel(err)),
-        }
-    }
-
-    pub(crate) async fn handle_write_stdin_tool(
-        &self,
-        params: WriteStdinParams,
-    ) -> Result<String, FunctionCallError> {
-        self.services
-            .session_manager
-            .handle_write_stdin_request(params)
-            .await
-            .map(|output| output.to_text_output())
-            .map_err(FunctionCallError::RespondToModel)
-    }
-
-    pub(crate) async fn run_unified_exec_request(
-        &self,
-        request: crate::unified_exec::UnifiedExecRequest<'_>,
-    ) -> Result<crate::unified_exec::UnifiedExecResult, crate::unified_exec::UnifiedExecError> {
-        self.services
-            .unified_exec_manager
-            .handle_request(request)
-            .await
-    }
-
    pub async fn interrupt_task(self: &Arc<Self>) {
        info!("interrupt received: abort current task, if any");
        self.abort_all_tasks(TurnAbortReason::Interrupted).await;
@@ -1633,6 +1421,7 @@ async fn spawn_review_thread(
        cwd: parent_turn_context.cwd.clone(),
        is_review_mode: true,
        final_output_json_schema: None,
+        codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
    };

    // Seed the child task with the review prompt as the initial user message.
@@ -2511,10 +2300,6 @@ fn is_mcp_client_auth_required_error(error: &anyhow::Error) -> bool {
    error.to_string().contains("Auth required")
 }

-use crate::executor::errors::ExecError;
-use crate::executor::linkers::PreparedExec;
-use crate::tools::context::ApplyPatchCommandContext;
-use crate::tools::context::ExecCommandContext;
 #[cfg(test)]
 pub(crate) use tests::make_session_and_context;

@@ -2523,6 +2308,8 @@ mod tests {
    use super::*;
    use crate::config::ConfigOverrides;
    use crate::config::ConfigToml;
+    use crate::exec::ExecToolCallOutput;
+    use crate::tools::format_exec_output_str;

    use crate::protocol::CompactedItem;
    use crate::protocol::InitialHistory;
@@ -2827,19 +2614,14 @@ mod tests {

        let services = SessionServices {
            mcp_connection_manager: McpConnectionManager::default(),
-            session_manager: ExecSessionManager::default(),
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(None),
            rollout: Mutex::new(None),
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
-            executor: Executor::new(ExecutorConfig::new(
-                session_configuration.sandbox_policy.clone(),
-                session_configuration.cwd.clone(),
-                None,
-            )),
            auth_manager: Arc::clone(&auth_manager),
            otel_event_manager: otel_event_manager.clone(),
+            tool_approvals: Mutex::new(ApprovalStore::default()),
        };

        let session = Session {
@@ -2898,19 +2680,14 @@ mod tests {

        let services = SessionServices {
            mcp_connection_manager: McpConnectionManager::default(),
-            session_manager: ExecSessionManager::default(),
            unified_exec_manager: UnifiedExecSessionManager::default(),
            notifier: UserNotifier::new(None),
            rollout: Mutex::new(None),
            user_shell: shell::Shell::Unknown,
            show_raw_agent_reasoning: config.show_raw_agent_reasoning,
-            executor: Executor::new(ExecutorConfig::new(
-                session_configuration.sandbox_policy.clone(),
-                session_configuration.cwd.clone(),
-                None,
-            )),
            auth_manager: Arc::clone(&auth_manager),
            otel_event_manager: otel_event_manager.clone(),
+            tool_approvals: Mutex::new(ApprovalStore::default()),
        };

        let session = Arc::new(Session {
@@ -3258,6 +3035,7 @@ mod tests {
            env: HashMap::new(),
            with_escalated_permissions: Some(true),
            justification: Some("test".to_string()),
+            arg0: None,
        };

        let params2 = ExecParams {
--- a/codex-rs/core/src/command_safety/is_safe_command.rs
+++ b/codex-rs/core/src/command_safety/is_safe_command.rs
@@ -1,15 +1,25 @@
 use crate::bash::parse_bash_lc_plain_commands;

 pub fn is_known_safe_command(command: &[String]) -> bool {
+    let command: Vec<String> = command
+        .iter()
+        .map(|s| {
+            if s == "zsh" {
+                "bash".to_string()
+            } else {
+                s.clone()
+            }
+        })
+        .collect();
    #[cfg(target_os = "windows")]
    {
        use super::windows_safe_commands::is_safe_command_windows;
-        if is_safe_command_windows(command) {
+        if is_safe_command_windows(&command) {
            return true;
        }
    }

-    if is_safe_to_call_with_exec(command) {
+    if is_safe_to_call_with_exec(&command) {
        return true;
    }

@@ -19,7 +29,7 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
    // introduce side effects ( "&&", "||", ";", and "|" ). If every
    // individual command in the script is itself a known‑safe command, then
    // the composite expression is considered safe.
-    if let Some(all_commands) = parse_bash_lc_plain_commands(command)
+    if let Some(all_commands) = parse_bash_lc_plain_commands(&command)
        && !all_commands.is_empty()
        && all_commands
            .iter()
@@ -31,9 +41,14 @@ pub fn is_known_safe_command(command: &[String]) -> bool {
 }

 fn is_safe_to_call_with_exec(command: &[String]) -> bool {
-    let cmd0 = command.first().map(String::as_str);
+    let Some(cmd0) = command.first().map(String::as_str) else {
+        return false;
+    };

-    match cmd0 {
+    match std::path::Path::new(&cmd0)
+        .file_name()
+        .and_then(|osstr| osstr.to_str())
+    {
        #[rustfmt::skip]
        Some(
            "cat" |
@@ -103,13 +118,12 @@ fn is_safe_to_call_with_exec(command: &[String]) -> bool {
        // Rust
        Some("cargo") if command.get(1).map(String::as_str) == Some("check") => true,

-        // Special-case `sed -n {N|M,N}p FILE`
+        // Special-case `sed -n {N|M,N}p`
        Some("sed")
            if {
-                command.len() == 4
+                command.len() <= 4
                    && command.get(1).map(String::as_str) == Some("-n")
                    && is_valid_sed_n_arg(command.get(2).map(String::as_str))
-                    && command.get(3).map(String::is_empty) == Some(false)
            } =>
        {
            true
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -18,13 +18,14 @@ use tokio::process::Child;
 use crate::error::CodexErr;
 use crate::error::Result;
 use crate::error::SandboxErr;
-use crate::landlock::spawn_command_under_linux_sandbox;
 use crate::protocol::Event;
 use crate::protocol::EventMsg;
 use crate::protocol::ExecCommandOutputDeltaEvent;
 use crate::protocol::ExecOutputStream;
 use crate::protocol::SandboxPolicy;
-use crate::seatbelt::spawn_command_under_seatbelt;
+use crate::sandboxing::CommandSpec;
+use crate::sandboxing::ExecEnv;
+use crate::sandboxing::SandboxManager;
 use crate::spawn::StdioPolicy;
 use crate::spawn::spawn_child_async;

@@ -53,6 +54,7 @@ pub struct ExecParams {
    pub env: HashMap<String, String>,
    pub with_escalated_permissions: Option<bool>,
    pub justification: Option<String>,
+    pub arg0: Option<String>,
 }

 impl ExecParams {
@@ -87,57 +89,85 @@ pub async fn process_exec_tool_call(
    codex_linux_sandbox_exe: &Option<PathBuf>,
    stdout_stream: Option<StdoutStream>,
 ) -> Result<ExecToolCallOutput> {
-    let start = Instant::now();
+    let ExecParams {
+        command,
+        cwd,
+        timeout_ms,
+        env,
+        with_escalated_permissions,
+        justification,
+        arg0: _,
+    } = params;

-    let timeout_duration = params.timeout_duration();
+    let (program, args) = command.split_first().ok_or_else(|| {
+        CodexErr::Io(io::Error::new(
+            io::ErrorKind::InvalidInput,
+            "command args are empty",
+        ))
+    })?;

-    let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
-    {
-        SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await,
-        SandboxType::MacosSeatbelt => {
-            let ExecParams {
-                command,
-                cwd: command_cwd,
-                env,
-                ..
-            } = params;
-            let child = spawn_command_under_seatbelt(
-                command,
-                command_cwd,
-                sandbox_policy,
-                sandbox_cwd,
-                StdioPolicy::RedirectForShellTool,
-                env,
-            )
-            .await?;
-            consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await
-        }
-        SandboxType::LinuxSeccomp => {
-            let ExecParams {
-                command,
-                cwd: command_cwd,
-                env,
-                ..
-            } = params;
-
-            let codex_linux_sandbox_exe = codex_linux_sandbox_exe
-                .as_ref()
-                .ok_or(CodexErr::LandlockSandboxExecutableNotProvided)?;
-            let child = spawn_command_under_linux_sandbox(
-                codex_linux_sandbox_exe,
-                command,
-                command_cwd,
-                sandbox_policy,
-                sandbox_cwd,
-                StdioPolicy::RedirectForShellTool,
-                env,
-            )
-            .await?;
-
-            consume_truncated_output(child, timeout_duration, stdout_stream).await
-        }
+    let spec = CommandSpec {
+        program: program.clone(),
+        args: args.to_vec(),
+        cwd,
+        env,
+        timeout_ms,
+        with_escalated_permissions,
+        justification,
    };
+
+    let manager = SandboxManager::new();
+    let exec_env = manager
+        .transform(
+            &spec,
+            sandbox_policy,
+            sandbox_type,
+            sandbox_cwd,
+            codex_linux_sandbox_exe.as_ref(),
+        )
+        .map_err(CodexErr::from)?;
+
+    // Route through the sandboxing module for a single, unified execution path.
+    crate::sandboxing::execute_env(&exec_env, sandbox_policy, stdout_stream).await
+}
+
+pub(crate) async fn execute_exec_env(
+    env: ExecEnv,
+    sandbox_policy: &SandboxPolicy,
+    stdout_stream: Option<StdoutStream>,
+) -> Result<ExecToolCallOutput> {
+    let ExecEnv {
+        command,
+        cwd,
+        env,
+        timeout_ms,
+        sandbox,
+        with_escalated_permissions,
+        justification,
+        arg0,
+    } = env;
+
+    let params = ExecParams {
+        command,
+        cwd,
+        timeout_ms,
+        env,
+        with_escalated_permissions,
+        justification,
+        arg0,
+    };
+
+    let start = Instant::now();
+    let raw_output_result = exec(params, sandbox_policy, stdout_stream).await;
    let duration = start.elapsed();
+    finalize_exec_result(raw_output_result, sandbox, duration)
+}
+
+fn finalize_exec_result(
+    raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr>,
+    sandbox_type: SandboxType,
+    duration: Duration,
+) -> Result<ExecToolCallOutput> {
    match raw_output_result {
        Ok(raw_output) => {
            #[allow(unused_mut)]
@@ -192,12 +222,30 @@ pub async fn process_exec_tool_call(
    }
 }

+pub(crate) mod errors {
+    use super::CodexErr;
+    use crate::sandboxing::SandboxTransformError;
+
+    impl From<SandboxTransformError> for CodexErr {
+        fn from(err: SandboxTransformError) -> Self {
+            match err {
+                SandboxTransformError::MissingLinuxSandboxExecutable => {
+                    CodexErr::LandlockSandboxExecutableNotProvided
+                }
+            }
+        }
+    }
+}
+
 /// We don't have a fully deterministic way to tell if our command failed
 /// because of the sandbox - a command in the user's zshrc file might hit an
 /// error, but the command itself might fail or succeed for other reasons.
 /// For now, we conservatively check for well known command failure exit codes and
 /// also look for common sandbox denial keywords in the command output.
-fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
+pub(crate) fn is_likely_sandbox_denied(
+    sandbox_type: SandboxType,
+    exec_output: &ExecToolCallOutput,
+) -> bool {
    if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
        return false;
    }
@@ -206,21 +254,17 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
    // 2: misuse of shell builtins
    // 126: permission denied
    // 127: command not found
-    const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
-    if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
-        return false;
-    }
-
-    const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
+    const SANDBOX_DENIED_KEYWORDS: [&str; 7] = [
        "operation not permitted",
        "permission denied",
        "read-only file system",
        "seccomp",
        "sandbox",
        "landlock",
+        "failed to write file",
    ];

-    if [
+    let has_sandbox_keyword = [
        &exec_output.stderr.text,
        &exec_output.stdout.text,
        &exec_output.aggregated_output.text,
@@ -231,10 +275,17 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
        SANDBOX_DENIED_KEYWORDS
            .iter()
            .any(|needle| lower.contains(needle))
-    }) {
+    });
+
+    if has_sandbox_keyword {
        return true;
    }

+    const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
+    if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
+        return false;
+    }
+
    #[cfg(unix)]
    {
        const SIGSYS_CODE: i32 = libc::SIGSYS;
@@ -248,11 +299,12 @@ fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCal
    false
 }

-#[derive(Debug)]
-pub struct StreamOutput<T> {
+#[derive(Debug, Clone)]
+pub struct StreamOutput<T: Clone> {
    pub text: T,
    pub truncated_after_lines: Option<u32>,
 }
+
 #[derive(Debug)]
 struct RawExecToolCallOutput {
    pub exit_status: ExitStatus,
@@ -285,7 +337,7 @@ fn append_all(dst: &mut Vec<u8>, src: &[u8]) {
    dst.extend_from_slice(src);
 }

-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct ExecToolCallOutput {
    pub exit_code: i32,
    pub stdout: StreamOutput<String>,
@@ -302,7 +354,11 @@ async fn exec(
 ) -> Result<RawExecToolCallOutput> {
    let timeout = params.timeout_duration();
    let ExecParams {
-        command, cwd, env, ..
+        command,
+        cwd,
+        env,
+        arg0,
+        ..
    } = params;

    let (program, args) = command.split_first().ok_or_else(|| {
@@ -311,11 +367,11 @@ async fn exec(
            "command args are empty",
        ))
    })?;
-    let arg0 = None;
+    let arg0_ref = arg0.as_deref();
    let child = spawn_child_async(
        PathBuf::from(program),
        args.into(),
-        arg0,
+        arg0_ref,
        cwd,
        sandbox_policy,
        StdioPolicy::RedirectForShellTool,
--- a/codex-rs/core/src/exec_command/exec_command_params.rs
+++ b/codex-rs/core/src/exec_command/exec_command_params.rs
@@ -1,57 +0,0 @@
-use serde::Deserialize;
-use serde::Serialize;
-
-use crate::exec_command::session_id::SessionId;
-
-#[derive(Debug, Clone, Deserialize)]
-pub struct ExecCommandParams {
-    pub(crate) cmd: String,
-
-    #[serde(default = "default_yield_time")]
-    pub(crate) yield_time_ms: u64,
-
-    #[serde(default = "max_output_tokens")]
-    pub(crate) max_output_tokens: u64,
-
-    #[serde(default = "default_shell")]
-    pub(crate) shell: String,
-
-    #[serde(default = "default_login")]
-    pub(crate) login: bool,
-}
-
-fn default_yield_time() -> u64 {
-    10_000
-}
-
-fn max_output_tokens() -> u64 {
-    10_000
-}
-
-fn default_login() -> bool {
-    true
-}
-
-fn default_shell() -> String {
-    "/bin/bash".to_string()
-}
-
-#[derive(Debug, Deserialize, Serialize)]
-pub struct WriteStdinParams {
-    pub(crate) session_id: SessionId,
-    pub(crate) chars: String,
-
-    #[serde(default = "write_stdin_default_yield_time_ms")]
-    pub(crate) yield_time_ms: u64,
-
-    #[serde(default = "write_stdin_default_max_output_tokens")]
-    pub(crate) max_output_tokens: u64,
-}
-
-fn write_stdin_default_yield_time_ms() -> u64 {
-    250
-}
-
-fn write_stdin_default_max_output_tokens() -> u64 {
-    10_000
-}
--- a/codex-rs/core/src/exec_command/exec_command_session.rs
+++ b/codex-rs/core/src/exec_command/exec_command_session.rs
@@ -1,96 +0,0 @@
-use std::sync::Mutex as StdMutex;
-
-use tokio::sync::broadcast;
-use tokio::sync::mpsc;
-use tokio::task::JoinHandle;
-
-#[derive(Debug)]
-pub(crate) struct ExecCommandSession {
-    /// Queue for writing bytes to the process stdin (PTY master write side).
-    writer_tx: mpsc::Sender<Vec<u8>>,
-    /// Broadcast stream of output chunks read from the PTY. New subscribers
-    /// receive only chunks emitted after they subscribe.
-    output_tx: broadcast::Sender<Vec<u8>>,
-
-    /// Child killer handle for termination on drop (can signal independently
-    /// of a thread blocked in `.wait()`).
-    killer: StdMutex<Option<Box<dyn portable_pty::ChildKiller + Send + Sync>>>,
-
-    /// JoinHandle for the blocking PTY reader task.
-    reader_handle: StdMutex<Option<JoinHandle<()>>>,
-
-    /// JoinHandle for the stdin writer task.
-    writer_handle: StdMutex<Option<JoinHandle<()>>>,
-
-    /// JoinHandle for the child wait task.
-    wait_handle: StdMutex<Option<JoinHandle<()>>>,
-
-    /// Tracks whether the underlying process has exited.
-    exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
-}
-
-impl ExecCommandSession {
-    pub(crate) fn new(
-        writer_tx: mpsc::Sender<Vec<u8>>,
-        output_tx: broadcast::Sender<Vec<u8>>,
-        killer: Box<dyn portable_pty::ChildKiller + Send + Sync>,
-        reader_handle: JoinHandle<()>,
-        writer_handle: JoinHandle<()>,
-        wait_handle: JoinHandle<()>,
-        exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
-    ) -> (Self, broadcast::Receiver<Vec<u8>>) {
-        let initial_output_rx = output_tx.subscribe();
-        (
-            Self {
-                writer_tx,
-                output_tx,
-                killer: StdMutex::new(Some(killer)),
-                reader_handle: StdMutex::new(Some(reader_handle)),
-                writer_handle: StdMutex::new(Some(writer_handle)),
-                wait_handle: StdMutex::new(Some(wait_handle)),
-                exit_status,
-            },
-            initial_output_rx,
-        )
-    }
-
-    pub(crate) fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
-        self.writer_tx.clone()
-    }
-
-    pub(crate) fn output_receiver(&self) -> broadcast::Receiver<Vec<u8>> {
-        self.output_tx.subscribe()
-    }
-
-    pub(crate) fn has_exited(&self) -> bool {
-        self.exit_status.load(std::sync::atomic::Ordering::SeqCst)
-    }
-}
-
-impl Drop for ExecCommandSession {
-    fn drop(&mut self) {
-        // Best-effort: terminate child first so blocking tasks can complete.
-        if let Ok(mut killer_opt) = self.killer.lock()
-            && let Some(mut killer) = killer_opt.take()
-        {
-            let _ = killer.kill();
-        }
-
-        // Abort background tasks; they may already have exited after kill.
-        if let Ok(mut h) = self.reader_handle.lock()
-            && let Some(handle) = h.take()
-        {
-            handle.abort();
-        }
-        if let Ok(mut h) = self.writer_handle.lock()
-            && let Some(handle) = h.take()
-        {
-            handle.abort();
-        }
-        if let Ok(mut h) = self.wait_handle.lock()
-            && let Some(handle) = h.take()
-        {
-            handle.abort();
-        }
-    }
-}
--- a/codex-rs/core/src/exec_command/mod.rs
+++ b/codex-rs/core/src/exec_command/mod.rs
@@ -1,14 +0,0 @@
-mod exec_command_params;
-mod exec_command_session;
-mod responses_api;
-mod session_id;
-mod session_manager;
-
-pub use exec_command_params::ExecCommandParams;
-pub use exec_command_params::WriteStdinParams;
-pub(crate) use exec_command_session::ExecCommandSession;
-pub use responses_api::EXEC_COMMAND_TOOL_NAME;
-pub use responses_api::WRITE_STDIN_TOOL_NAME;
-pub use responses_api::create_exec_command_tool_for_responses_api;
-pub use responses_api::create_write_stdin_tool_for_responses_api;
-pub use session_manager::SessionManager as ExecSessionManager;
--- a/codex-rs/core/src/exec_command/responses_api.rs
+++ b/codex-rs/core/src/exec_command/responses_api.rs
@@ -1,98 +0,0 @@
-use std::collections::BTreeMap;
-
-use crate::client_common::tools::ResponsesApiTool;
-use crate::openai_tools::JsonSchema;
-
-pub const EXEC_COMMAND_TOOL_NAME: &str = "exec_command";
-pub const WRITE_STDIN_TOOL_NAME: &str = "write_stdin";
-
-pub fn create_exec_command_tool_for_responses_api() -> ResponsesApiTool {
-    let mut properties = BTreeMap::<String, JsonSchema>::new();
-    properties.insert(
-        "cmd".to_string(),
-        JsonSchema::String {
-            description: Some("The shell command to execute.".to_string()),
-        },
-    );
-    properties.insert(
-        "yield_time_ms".to_string(),
-        JsonSchema::Number {
-            description: Some("The maximum time in milliseconds to wait for output.".to_string()),
-        },
-    );
-    properties.insert(
-        "max_output_tokens".to_string(),
-        JsonSchema::Number {
-            description: Some("The maximum number of tokens to output.".to_string()),
-        },
-    );
-    properties.insert(
-        "shell".to_string(),
-        JsonSchema::String {
-            description: Some("The shell to use. Defaults to \"/bin/bash\".".to_string()),
-        },
-    );
-    properties.insert(
-        "login".to_string(),
-        JsonSchema::Boolean {
-            description: Some(
-                "Whether to run the command as a login shell. Defaults to true.".to_string(),
-            ),
-        },
-    );
-
-    ResponsesApiTool {
-        name: EXEC_COMMAND_TOOL_NAME.to_owned(),
-        description: r#"Execute shell commands on the local machine with streaming output."#
-            .to_string(),
-        strict: false,
-        parameters: JsonSchema::Object {
-            properties,
-            required: Some(vec!["cmd".to_string()]),
-            additional_properties: Some(false.into()),
-        },
-    }
-}
-
-pub fn create_write_stdin_tool_for_responses_api() -> ResponsesApiTool {
-    let mut properties = BTreeMap::<String, JsonSchema>::new();
-    properties.insert(
-        "session_id".to_string(),
-        JsonSchema::Number {
-            description: Some("The ID of the exec_command session.".to_string()),
-        },
-    );
-    properties.insert(
-        "chars".to_string(),
-        JsonSchema::String {
-            description: Some("The characters to write to stdin.".to_string()),
-        },
-    );
-    properties.insert(
-        "yield_time_ms".to_string(),
-        JsonSchema::Number {
-            description: Some(
-                "The maximum time in milliseconds to wait for output after writing.".to_string(),
-            ),
-        },
-    );
-    properties.insert(
-        "max_output_tokens".to_string(),
-        JsonSchema::Number {
-            description: Some("The maximum number of tokens to output.".to_string()),
-        },
-    );
-
-    ResponsesApiTool {
-        name: WRITE_STDIN_TOOL_NAME.to_owned(),
-        description: r#"Write characters to an exec session's stdin. Returns all stdout+stderr received within yield_time_ms.
-Can write control characters (\u0003 for Ctrl-C), or an empty string to just poll stdout+stderr."#
-            .to_string(),
-        strict: false,
-        parameters: JsonSchema::Object {
-            properties,
-            required: Some(vec!["session_id".to_string(), "chars".to_string()]),
-            additional_properties: Some(false.into()),
-        },
-    }
-}
--- a/codex-rs/core/src/exec_command/session_id.rs
+++ b/codex-rs/core/src/exec_command/session_id.rs
@@ -1,5 +0,0 @@
-use serde::Deserialize;
-use serde::Serialize;
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
-pub(crate) struct SessionId(pub u32);
--- a/codex-rs/core/src/exec_command/session_manager.rs
+++ b/codex-rs/core/src/exec_command/session_manager.rs
@@ -1,506 +0,0 @@
-use std::collections::HashMap;
-use std::io::ErrorKind;
-use std::io::Read;
-use std::sync::Arc;
-use std::sync::Mutex as StdMutex;
-use std::sync::atomic::AtomicBool;
-use std::sync::atomic::AtomicU32;
-
-use portable_pty::CommandBuilder;
-use portable_pty::PtySize;
-use portable_pty::native_pty_system;
-use tokio::sync::Mutex;
-use tokio::sync::mpsc;
-use tokio::sync::oneshot;
-use tokio::time::Duration;
-use tokio::time::Instant;
-use tokio::time::timeout;
-
-use crate::exec_command::exec_command_params::ExecCommandParams;
-use crate::exec_command::exec_command_params::WriteStdinParams;
-use crate::exec_command::exec_command_session::ExecCommandSession;
-use crate::exec_command::session_id::SessionId;
-use crate::truncate::truncate_middle;
-
-#[derive(Debug, Default)]
-pub struct SessionManager {
-    next_session_id: AtomicU32,
-    sessions: Mutex<HashMap<SessionId, ExecCommandSession>>,
-}
-
-#[derive(Debug)]
-pub struct ExecCommandOutput {
-    wall_time: Duration,
-    exit_status: ExitStatus,
-    original_token_count: Option<u64>,
-    output: String,
-}
-
-impl ExecCommandOutput {
-    pub(crate) fn to_text_output(&self) -> String {
-        let wall_time_secs = self.wall_time.as_secs_f32();
-        let termination_status = match self.exit_status {
-            ExitStatus::Exited(code) => format!("Process exited with code {code}"),
-            ExitStatus::Ongoing(session_id) => {
-                format!("Process running with session ID {}", session_id.0)
-            }
-        };
-        let truncation_status = match self.original_token_count {
-            Some(tokens) => {
-                format!("\nWarning: truncated output (original token count: {tokens})")
-            }
-            None => "".to_string(),
-        };
-        format!(
-            r#"Wall time: {wall_time_secs:.3} seconds
-{termination_status}{truncation_status}
-Output:
-{output}"#,
-            output = self.output
-        )
-    }
-}
-
-#[derive(Debug)]
-pub enum ExitStatus {
-    Exited(i32),
-    Ongoing(SessionId),
-}
-
-impl SessionManager {
-    /// Processes the request and is required to send a response via `outgoing`.
-    pub async fn handle_exec_command_request(
-        &self,
-        params: ExecCommandParams,
-    ) -> Result<ExecCommandOutput, String> {
-        // Allocate a session id.
-        let session_id = SessionId(
-            self.next_session_id
-                .fetch_add(1, std::sync::atomic::Ordering::SeqCst),
-        );
-
-        let (session, mut output_rx, mut exit_rx) = create_exec_command_session(params.clone())
-            .await
-            .map_err(|err| {
-                format!(
-                    "failed to create exec command session for session id {}: {err}",
-                    session_id.0
-                )
-            })?;
-
-        // Insert into session map.
-        self.sessions.lock().await.insert(session_id, session);
-
-        // Collect output until either timeout expires or process exits.
-        // Do not cap during collection; truncate at the end if needed.
-        // Use a modest initial capacity to avoid large preallocation.
-        let cap_bytes_u64 = params.max_output_tokens.saturating_mul(4);
-        let cap_bytes: usize = cap_bytes_u64.min(usize::MAX as u64) as usize;
-        let mut collected: Vec<u8> = Vec::with_capacity(4096);
-
-        let start_time = Instant::now();
-        let deadline = start_time + Duration::from_millis(params.yield_time_ms);
-        let mut exit_code: Option<i32> = None;
-
-        loop {
-            if Instant::now() >= deadline {
-                break;
-            }
-            let remaining = deadline.saturating_duration_since(Instant::now());
-            tokio::select! {
-                biased;
-                exit = &mut exit_rx => {
-                    exit_code = exit.ok();
-                    // Small grace period to pull remaining buffered output
-                    let grace_deadline = Instant::now() + Duration::from_millis(25);
-                    while Instant::now() < grace_deadline {
-                        match timeout(Duration::from_millis(1), output_rx.recv()).await {
-                            Ok(Ok(chunk)) => {
-                                collected.extend_from_slice(&chunk);
-                            }
-                            Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
-                                // Skip missed messages; keep trying within grace period.
-                                continue;
-                            }
-                            Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => break,
-                            Err(_) => break,
-                        }
-                    }
-                    break;
-                }
-                chunk = timeout(remaining, output_rx.recv()) => {
-                    match chunk {
-                        Ok(Ok(chunk)) => {
-                            collected.extend_from_slice(&chunk);
-                        }
-                        Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
-                            // Skip missed messages; continue collecting fresh output.
-                        }
-                        Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => { break; }
-                        Err(_) => { break; }
-                    }
-                }
-            }
-        }
-
-        let output = String::from_utf8_lossy(&collected).to_string();
-
-        let exit_status = if let Some(code) = exit_code {
-            ExitStatus::Exited(code)
-        } else {
-            ExitStatus::Ongoing(session_id)
-        };
-
-        // If output exceeds cap, truncate the middle and record original token estimate.
-        let (output, original_token_count) = truncate_middle(&output, cap_bytes);
-        Ok(ExecCommandOutput {
-            wall_time: Instant::now().duration_since(start_time),
-            exit_status,
-            original_token_count,
-            output,
-        })
-    }
-
-    /// Write characters to a session's stdin and collect combined output for up to `yield_time_ms`.
-    pub async fn handle_write_stdin_request(
-        &self,
-        params: WriteStdinParams,
-    ) -> Result<ExecCommandOutput, String> {
-        let WriteStdinParams {
-            session_id,
-            chars,
-            yield_time_ms,
-            max_output_tokens,
-        } = params;
-
-        // Grab handles without holding the sessions lock across await points.
-        let (writer_tx, mut output_rx) = {
-            let sessions = self.sessions.lock().await;
-            match sessions.get(&session_id) {
-                Some(session) => (session.writer_sender(), session.output_receiver()),
-                None => {
-                    return Err(format!("unknown session id {}", session_id.0));
-                }
-            }
-        };
-
-        // Write stdin if provided.
-        if !chars.is_empty() && writer_tx.send(chars.into_bytes()).await.is_err() {
-            return Err("failed to write to stdin".to_string());
-        }
-
-        // Collect output up to yield_time_ms, truncating to max_output_tokens bytes.
-        let mut collected: Vec<u8> = Vec::with_capacity(4096);
-        let start_time = Instant::now();
-        let deadline = start_time + Duration::from_millis(yield_time_ms);
-        loop {
-            let now = Instant::now();
-            if now >= deadline {
-                break;
-            }
-            let remaining = deadline - now;
-            match timeout(remaining, output_rx.recv()).await {
-                Ok(Ok(chunk)) => {
-                    // Collect all output within the time budget; truncate at the end.
-                    collected.extend_from_slice(&chunk);
-                }
-                Ok(Err(tokio::sync::broadcast::error::RecvError::Lagged(_))) => {
-                    // Skip missed messages; continue collecting fresh output.
-                }
-                Ok(Err(tokio::sync::broadcast::error::RecvError::Closed)) => break,
-                Err(_) => break, // timeout
-            }
-        }
-
-        // Return structured output, truncating middle if over cap.
-        let output = String::from_utf8_lossy(&collected).to_string();
-        let cap_bytes_u64 = max_output_tokens.saturating_mul(4);
-        let cap_bytes: usize = cap_bytes_u64.min(usize::MAX as u64) as usize;
-        let (output, original_token_count) = truncate_middle(&output, cap_bytes);
-        Ok(ExecCommandOutput {
-            wall_time: Instant::now().duration_since(start_time),
-            exit_status: ExitStatus::Ongoing(session_id),
-            original_token_count,
-            output,
-        })
-    }
-}
-
-/// Spawn PTY and child process per spawn_exec_command_session logic.
-async fn create_exec_command_session(
-    params: ExecCommandParams,
-) -> anyhow::Result<(
-    ExecCommandSession,
-    tokio::sync::broadcast::Receiver<Vec<u8>>,
-    oneshot::Receiver<i32>,
-)> {
-    let ExecCommandParams {
-        cmd,
-        yield_time_ms: _,
-        max_output_tokens: _,
-        shell,
-        login,
-    } = params;
-
-    // Use the native pty implementation for the system
-    let pty_system = native_pty_system();
-
-    // Create a new pty
-    let pair = pty_system.openpty(PtySize {
-        rows: 24,
-        cols: 80,
-        pixel_width: 0,
-        pixel_height: 0,
-    })?;
-
-    // Spawn a shell into the pty
-    let mut command_builder = CommandBuilder::new(shell);
-    let shell_mode_opt = if login { "-lc" } else { "-c" };
-    command_builder.arg(shell_mode_opt);
-    command_builder.arg(cmd);
-
-    let mut child = pair.slave.spawn_command(command_builder)?;
-    // Obtain a killer that can signal the process independently of `.wait()`.
-    let killer = child.clone_killer();
-
-    // Channel to forward write requests to the PTY writer.
-    let (writer_tx, mut writer_rx) = mpsc::channel::<Vec<u8>>(128);
-    // Broadcast for streaming PTY output to readers: subscribers receive from subscription time.
-    let (output_tx, _) = tokio::sync::broadcast::channel::<Vec<u8>>(256);
-    // Reader task: drain PTY and forward chunks to output channel.
-    let mut reader = pair.master.try_clone_reader()?;
-    let output_tx_clone = output_tx.clone();
-    let reader_handle = tokio::task::spawn_blocking(move || {
-        let mut buf = [0u8; 8192];
-        loop {
-            match reader.read(&mut buf) {
-                Ok(0) => break, // EOF
-                Ok(n) => {
-                    // Forward to broadcast; best-effort if there are subscribers.
-                    let _ = output_tx_clone.send(buf[..n].to_vec());
-                }
-                Err(ref e) if e.kind() == ErrorKind::Interrupted => {
-                    // Retry on EINTR
-                    continue;
-                }
-                Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
-                    // We're in a blocking thread; back off briefly and retry.
-                    std::thread::sleep(Duration::from_millis(5));
-                    continue;
-                }
-                Err(_) => break,
-            }
-        }
-    });
-
-    // Writer task: apply stdin writes to the PTY writer.
-    let writer = pair.master.take_writer()?;
-    let writer = Arc::new(StdMutex::new(writer));
-    let writer_handle = tokio::spawn({
-        let writer = writer.clone();
-        async move {
-            while let Some(bytes) = writer_rx.recv().await {
-                let writer = writer.clone();
-                // Perform blocking write on a blocking thread.
-                let _ = tokio::task::spawn_blocking(move || {
-                    if let Ok(mut guard) = writer.lock() {
-                        use std::io::Write;
-                        let _ = guard.write_all(&bytes);
-                        let _ = guard.flush();
-                    }
-                })
-                .await;
-            }
-        }
-    });
-
-    // Keep the child alive until it exits, then signal exit code.
-    let (exit_tx, exit_rx) = oneshot::channel::<i32>();
-    let exit_status = Arc::new(AtomicBool::new(false));
-    let wait_exit_status = exit_status.clone();
-    let wait_handle = tokio::task::spawn_blocking(move || {
-        let code = match child.wait() {
-            Ok(status) => status.exit_code() as i32,
-            Err(_) => -1,
-        };
-        wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst);
-        let _ = exit_tx.send(code);
-    });
-
-    // Create and store the session with channels.
-    let (session, initial_output_rx) = ExecCommandSession::new(
-        writer_tx,
-        output_tx,
-        killer,
-        reader_handle,
-        writer_handle,
-        wait_handle,
-        exit_status,
-    );
-    Ok((session, initial_output_rx, exit_rx))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::exec_command::session_id::SessionId;
-
-    /// Test that verifies that [`SessionManager::handle_exec_command_request()`]
-    /// and [`SessionManager::handle_write_stdin_request()`] work as expected
-    /// in the presence of a process that never terminates (but produces
-    /// output continuously).
-    #[cfg(unix)]
-    #[allow(clippy::print_stderr)]
-    #[tokio::test(flavor = "multi_thread", worker_threads = 4)]
-    async fn session_manager_streams_and_truncates_from_now() {
-        use crate::exec_command::exec_command_params::ExecCommandParams;
-        use crate::exec_command::exec_command_params::WriteStdinParams;
-        use tokio::time::sleep;
-
-        let session_manager = SessionManager::default();
-        // Long-running loop that prints an increasing counter every ~100ms.
-        // Use Python for a portable, reliable sleep across shells/PTYs.
-        let cmd = r#"python3 - <<'PY'
-import sys, time
-count = 0
-while True:
-    print(count)
-    sys.stdout.flush()
-    count += 100
-    time.sleep(0.1)
-PY"#
-        .to_string();
-
-        // Start the session and collect ~3s of output.
-        let params = ExecCommandParams {
-            cmd,
-            yield_time_ms: 3_000,
-            max_output_tokens: 1_000, // large enough to avoid truncation here
-            shell: "/bin/bash".to_string(),
-            login: false,
-        };
-        let initial_output = match session_manager
-            .handle_exec_command_request(params.clone())
-            .await
-        {
-            Ok(v) => v,
-            Err(e) => {
-                // PTY may be restricted in some sandboxes; skip in that case.
-                if e.contains("openpty") || e.contains("Operation not permitted") {
-                    eprintln!("skipping test due to restricted PTY: {e}");
-                    return;
-                }
-                panic!("exec request failed unexpectedly: {e}");
-            }
-        };
-        eprintln!("initial output: {initial_output:?}");
-
-        // Should be ongoing (we launched a never-ending loop).
-        let session_id = match initial_output.exit_status {
-            ExitStatus::Ongoing(id) => id,
-            _ => panic!("expected ongoing session"),
-        };
-
-        // Parse the numeric lines and get the max observed value in the first window.
-        let first_nums = extract_monotonic_numbers(&initial_output.output);
-        assert!(
-            !first_nums.is_empty(),
-            "expected some output from first window"
-        );
-        let first_max = *first_nums.iter().max().unwrap();
-
-        // Wait ~4s so counters progress while we're not reading.
-        sleep(Duration::from_millis(4_000)).await;
-
-        // Now read ~3s of output "from now" only.
-        // Use a small token cap so truncation occurs and we test middle truncation.
-        let write_params = WriteStdinParams {
-            session_id,
-            chars: String::new(),
-            yield_time_ms: 3_000,
-            max_output_tokens: 16, // 16 tokens ~= 64 bytes -> likely truncation
-        };
-        let second = session_manager
-            .handle_write_stdin_request(write_params)
-            .await
-            .expect("write stdin should succeed");
-
-        // Verify truncation metadata and size bound (cap is tokens*4 bytes).
-        assert!(second.original_token_count.is_some());
-        let cap_bytes = (16u64 * 4) as usize;
-        assert!(second.output.len() <= cap_bytes);
-        // New middle marker should be present.
-        assert!(
-            second.output.contains("tokens truncated") && second.output.contains('…'),
-            "expected truncation marker in output, got: {}",
-            second.output
-        );
-
-        // Minimal freshness check: the earliest number we see in the second window
-        // should be significantly larger than the last from the first window.
-        let second_nums = extract_monotonic_numbers(&second.output);
-        assert!(
-            !second_nums.is_empty(),
-            "expected some numeric output from second window"
-        );
-        let second_min = *second_nums.iter().min().unwrap();
-
-        // We slept 4 seconds (~40 ticks at 100ms/tick, each +100), so expect
-        // an increase of roughly 4000 or more. Allow a generous margin.
-        assert!(
-            second_min >= first_max + 2000,
-            "second_min={second_min} first_max={first_max}",
-        );
-    }
-
-    #[cfg(unix)]
-    fn extract_monotonic_numbers(s: &str) -> Vec<i64> {
-        s.lines()
-            .filter_map(|line| {
-                if !line.is_empty()
-                    && line.chars().all(|c| c.is_ascii_digit())
-                    && let Ok(n) = line.parse::<i64>()
-                {
-                    // Our generator increments by 100; ignore spurious fragments.
-                    if n % 100 == 0 {
-                        return Some(n);
-                    }
-                }
-                None
-            })
-            .collect()
-    }
-
-    #[test]
-    fn to_text_output_exited_no_truncation() {
-        let out = ExecCommandOutput {
-            wall_time: Duration::from_millis(1234),
-            exit_status: ExitStatus::Exited(0),
-            original_token_count: None,
-            output: "hello".to_string(),
-        };
-        let text = out.to_text_output();
-        let expected = r#"Wall time: 1.234 seconds
-Process exited with code 0
-Output:
-hello"#;
-        assert_eq!(expected, text);
-    }
-
-    #[test]
-    fn to_text_output_ongoing_with_truncation() {
-        let out = ExecCommandOutput {
-            wall_time: Duration::from_millis(500),
-            exit_status: ExitStatus::Ongoing(SessionId(42)),
-            original_token_count: Some(1000),
-            output: "abc".to_string(),
-        };
-        let text = out.to_text_output();
-        let expected = r#"Wall time: 0.500 seconds
-Process running with session ID 42
-Warning: truncated output (original token count: 1000)
-Output:
-abc"#;
-        assert_eq!(expected, text);
-    }
-}
--- a/codex-rs/core/src/executor/backends.rs
+++ b/codex-rs/core/src/executor/backends.rs
@@ -1,109 +0,0 @@
-use std::collections::HashMap;
-use std::env;
-
-use async_trait::async_trait;
-
-use crate::CODEX_APPLY_PATCH_ARG1;
-use crate::apply_patch::ApplyPatchExec;
-use crate::exec::ExecParams;
-use crate::executor::ExecutorConfig;
-use crate::function_tool::FunctionCallError;
-
-pub(crate) enum ExecutionMode {
-    Shell,
-    ApplyPatch(ApplyPatchExec),
-}
-
-#[async_trait]
-/// Backend-specific hooks that prepare and post-process execution requests for a
-/// given [`ExecutionMode`].
-pub(crate) trait ExecutionBackend: Send + Sync {
-    fn prepare(
-        &self,
-        params: ExecParams,
-        // Required for downcasting the apply_patch.
-        mode: &ExecutionMode,
-        config: &ExecutorConfig,
-    ) -> Result<ExecParams, FunctionCallError>;
-
-    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
-        true
-    }
-}
-
-static SHELL_BACKEND: ShellBackend = ShellBackend;
-static APPLY_PATCH_BACKEND: ApplyPatchBackend = ApplyPatchBackend;
-
-pub(crate) fn backend_for_mode(mode: &ExecutionMode) -> &'static dyn ExecutionBackend {
-    match mode {
-        ExecutionMode::Shell => &SHELL_BACKEND,
-        ExecutionMode::ApplyPatch(_) => &APPLY_PATCH_BACKEND,
-    }
-}
-
-struct ShellBackend;
-
-#[async_trait]
-impl ExecutionBackend for ShellBackend {
-    fn prepare(
-        &self,
-        params: ExecParams,
-        mode: &ExecutionMode,
-        _config: &ExecutorConfig,
-    ) -> Result<ExecParams, FunctionCallError> {
-        match mode {
-            ExecutionMode::Shell => Ok(params),
-            _ => Err(FunctionCallError::RespondToModel(
-                "shell backend invoked with non-shell mode".to_string(),
-            )),
-        }
-    }
-}
-
-struct ApplyPatchBackend;
-
-#[async_trait]
-impl ExecutionBackend for ApplyPatchBackend {
-    fn prepare(
-        &self,
-        params: ExecParams,
-        mode: &ExecutionMode,
-        config: &ExecutorConfig,
-    ) -> Result<ExecParams, FunctionCallError> {
-        match mode {
-            ExecutionMode::ApplyPatch(exec) => {
-                let path_to_codex = if let Some(exe_path) = &config.codex_exe {
-                    exe_path.to_string_lossy().to_string()
-                } else {
-                    env::current_exe()
-                        .ok()
-                        .map(|p| p.to_string_lossy().to_string())
-                        .ok_or_else(|| {
-                            FunctionCallError::RespondToModel(
-                                "failed to determine path to codex executable".to_string(),
-                            )
-                        })?
-                };
-
-                let patch = exec.action.patch.clone();
-                Ok(ExecParams {
-                    command: vec![path_to_codex, CODEX_APPLY_PATCH_ARG1.to_string(), patch],
-                    cwd: exec.action.cwd.clone(),
-                    timeout_ms: params.timeout_ms,
-                    // Run apply_patch with a minimal environment for determinism and to
-                    // avoid leaking host environment variables into the patch process.
-                    env: HashMap::new(),
-                    with_escalated_permissions: params.with_escalated_permissions,
-                    justification: params.justification,
-                })
-            }
-            ExecutionMode::Shell => Err(FunctionCallError::RespondToModel(
-                "apply_patch backend invoked without patch context".to_string(),
-            )),
-        }
-    }
-
-    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
-        false
-    }
-}
--- a/codex-rs/core/src/executor/cache.rs
+++ b/codex-rs/core/src/executor/cache.rs
@@ -1,51 +0,0 @@
-use std::collections::HashSet;
-use std::sync::Arc;
-use std::sync::Mutex;
-
-#[derive(Clone, Debug, Default)]
-/// Thread-safe store of user approvals so repeated commands can reuse
-/// previously granted trust.
-pub(crate) struct ApprovalCache {
-    inner: Arc<Mutex<HashSet<Vec<String>>>>,
-}
-
-impl ApprovalCache {
-    pub(crate) fn insert(&self, command: Vec<String>) {
-        if command.is_empty() {
-            return;
-        }
-        if let Ok(mut guard) = self.inner.lock() {
-            guard.insert(command);
-        }
-    }
-
-    pub(crate) fn snapshot(&self) -> HashSet<Vec<String>> {
-        self.inner.lock().map(|g| g.clone()).unwrap_or_default()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use pretty_assertions::assert_eq;
-
-    #[test]
-    fn insert_ignores_empty_and_dedupes() {
-        let cache = ApprovalCache::default();
-
-        // Empty should be ignored
-        cache.insert(vec![]);
-        assert!(cache.snapshot().is_empty());
-
-        // Insert a command and verify snapshot contains it
-        let cmd = vec!["foo".to_string(), "bar".to_string()];
-        cache.insert(cmd.clone());
-        let snap1 = cache.snapshot();
-        assert!(snap1.contains(&cmd));
-
-        // Reinserting should not create duplicates
-        cache.insert(cmd);
-        let snap2 = cache.snapshot();
-        assert_eq!(snap1, snap2);
-    }
-}
--- a/codex-rs/core/src/executor/mod.rs
+++ b/codex-rs/core/src/executor/mod.rs
@@ -1,68 +0,0 @@
-mod backends;
-mod cache;
-mod runner;
-mod sandbox;
-
-pub(crate) use backends::ExecutionMode;
-pub(crate) use runner::ExecutionRequest;
-pub(crate) use runner::Executor;
-pub(crate) use runner::ExecutorConfig;
-pub(crate) use runner::normalize_exec_result;
-
-pub(crate) mod linkers {
-    use crate::exec::ExecParams;
-    use crate::exec::StdoutStream;
-    use crate::executor::backends::ExecutionMode;
-    use crate::executor::runner::ExecutionRequest;
-    use crate::tools::context::ExecCommandContext;
-
-    pub struct PreparedExec {
-        pub(crate) context: ExecCommandContext,
-        pub(crate) request: ExecutionRequest,
-    }
-
-    impl PreparedExec {
-        pub fn new(
-            context: ExecCommandContext,
-            params: ExecParams,
-            approval_command: Vec<String>,
-            mode: ExecutionMode,
-            stdout_stream: Option<StdoutStream>,
-            use_shell_profile: bool,
-        ) -> Self {
-            let request = ExecutionRequest {
-                params,
-                approval_command,
-                mode,
-                stdout_stream,
-                use_shell_profile,
-            };
-
-            Self { context, request }
-        }
-    }
-}
-
-pub mod errors {
-    use crate::error::CodexErr;
-    use crate::function_tool::FunctionCallError;
-    use thiserror::Error;
-
-    #[derive(Debug, Error)]
-    pub enum ExecError {
-        #[error(transparent)]
-        Function(#[from] FunctionCallError),
-        #[error(transparent)]
-        Codex(#[from] CodexErr),
-    }
-
-    impl ExecError {
-        pub(crate) fn rejection(msg: impl Into<String>) -> Self {
-            FunctionCallError::RespondToModel(msg.into()).into()
-        }
-
-        pub(crate) fn denied(msg: impl Into<String>) -> Self {
-            FunctionCallError::Denied(msg.into()).into()
-        }
-    }
-}
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -1,433 +0,0 @@
-use std::future::Future;
-use std::path::PathBuf;
-use std::sync::Arc;
-use std::sync::RwLock;
-use std::time::Duration;
-
-use super::backends::ExecutionMode;
-use super::backends::backend_for_mode;
-use super::cache::ApprovalCache;
-use crate::codex::Session;
-use crate::error::CodexErr;
-use crate::error::SandboxErr;
-use crate::error::get_error_message_ui;
-use crate::exec::ExecParams;
-use crate::exec::ExecToolCallOutput;
-use crate::exec::SandboxType;
-use crate::exec::StdoutStream;
-use crate::exec::StreamOutput;
-use crate::exec::process_exec_tool_call;
-use crate::executor::errors::ExecError;
-use crate::executor::sandbox::select_sandbox;
-use crate::function_tool::FunctionCallError;
-use crate::protocol::AskForApproval;
-use crate::protocol::ReviewDecision;
-use crate::protocol::SandboxPolicy;
-use crate::shell;
-use crate::tools::context::ExecCommandContext;
-use codex_otel::otel_event_manager::ToolDecisionSource;
-
-#[derive(Clone, Debug)]
-pub(crate) struct ExecutorConfig {
-    pub(crate) sandbox_policy: SandboxPolicy,
-    pub(crate) sandbox_cwd: PathBuf,
-    pub(crate) codex_exe: Option<PathBuf>,
-}
-
-impl ExecutorConfig {
-    pub(crate) fn new(
-        sandbox_policy: SandboxPolicy,
-        sandbox_cwd: PathBuf,
-        codex_exe: Option<PathBuf>,
-    ) -> Self {
-        Self {
-            sandbox_policy,
-            sandbox_cwd,
-            codex_exe,
-        }
-    }
-}
-
-/// Coordinates sandbox selection, backend-specific preparation, and command
-/// execution for tool calls requested by the model.
-pub(crate) struct Executor {
-    approval_cache: ApprovalCache,
-    config: Arc<RwLock<ExecutorConfig>>,
-}
-
-impl Executor {
-    pub(crate) fn new(config: ExecutorConfig) -> Self {
-        Self {
-            approval_cache: ApprovalCache::default(),
-            config: Arc::new(RwLock::new(config)),
-        }
-    }
-
-    /// Updates the sandbox policy and working directory used for future
-    /// executions without recreating the executor.
-    pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) {
-        if let Ok(mut cfg) = self.config.write() {
-            cfg.sandbox_policy = sandbox_policy;
-            cfg.sandbox_cwd = sandbox_cwd;
-        }
-    }
-
-    /// Runs a prepared execution request end-to-end: prepares parameters, decides on
-    /// sandbox placement (prompting the user when necessary), launches the command,
-    /// and lets the backend post-process the final output.
-    pub(crate) async fn run<F, Fut>(
-        &self,
-        mut request: ExecutionRequest,
-        session: &Session,
-        approval_policy: AskForApproval,
-        context: &ExecCommandContext,
-        on_exec_begin: F,
-    ) -> Result<ExecToolCallOutput, ExecError>
-    where
-        F: FnOnce() -> Fut,
-        Fut: Future<Output = ()>,
-    {
-        if matches!(request.mode, ExecutionMode::Shell) {
-            request.params =
-                maybe_translate_shell_command(request.params, session, request.use_shell_profile);
-        }
-
-        // Step 1: Snapshot sandbox configuration so it stays stable for this run.
-        let config = self
-            .config
-            .read()
-            .map_err(|_| ExecError::rejection("executor config poisoned"))?
-            .clone();
-
-        // Step 2: Normalise parameters via the selected backend.
-        let backend = backend_for_mode(&request.mode);
-        let stdout_stream = if backend.stream_stdout(&request.mode) {
-            request.stdout_stream.clone()
-        } else {
-            None
-        };
-        request.params = backend
-            .prepare(request.params, &request.mode, &config)
-            .map_err(ExecError::from)?;
-
-        // Step 3: Decide sandbox placement, prompting for approval when needed.
-        let sandbox_decision = select_sandbox(
-            &request,
-            approval_policy,
-            self.approval_cache.snapshot(),
-            &config,
-            session,
-            &context.sub_id,
-            &context.call_id,
-            &context.otel_event_manager,
-        )
-        .await?;
-        if sandbox_decision.record_session_approval {
-            self.approval_cache.insert(request.approval_command.clone());
-        }
-        on_exec_begin().await;
-        // Step 4: Launch the command within the chosen sandbox.
-        let first_attempt = self
-            .spawn(
-                request.params.clone(),
-                sandbox_decision.initial_sandbox,
-                &config,
-                stdout_stream.clone(),
-            )
-            .await;
-
-        // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry.
-        match first_attempt {
-            Ok(output) => Ok(output),
-            Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => {
-                Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into())
-            }
-            Err(CodexErr::Sandbox(error)) => {
-                if sandbox_decision.escalate_on_failure {
-                    self.retry_without_sandbox(
-                        &request,
-                        &config,
-                        session,
-                        context,
-                        stdout_stream,
-                        error,
-                    )
-                    .await
-                } else {
-                    let message = sandbox_failure_message(error);
-                    Err(ExecError::rejection(message))
-                }
-            }
-            Err(err) => Err(err.into()),
-        }
-    }
-
-    /// Fallback path invoked when a sandboxed run is denied so the user can
-    /// approve rerunning without isolation.
-    async fn retry_without_sandbox(
-        &self,
-        request: &ExecutionRequest,
-        config: &ExecutorConfig,
-        session: &Session,
-        context: &ExecCommandContext,
-        stdout_stream: Option<StdoutStream>,
-        sandbox_error: SandboxErr,
-    ) -> Result<ExecToolCallOutput, ExecError> {
-        session
-            .notify_background_event(
-                &context.sub_id,
-                format!("Execution failed: {sandbox_error}"),
-            )
-            .await;
-        let decision = session
-            .request_command_approval(
-                context.sub_id.to_string(),
-                context.call_id.to_string(),
-                request.approval_command.clone(),
-                request.params.cwd.clone(),
-                Some("command failed; retry without sandbox?".to_string()),
-            )
-            .await;
-
-        context.otel_event_manager.tool_decision(
-            &context.tool_name,
-            &context.call_id,
-            decision,
-            ToolDecisionSource::User,
-        );
-        match decision {
-            ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {
-                if matches!(decision, ReviewDecision::ApprovedForSession) {
-                    self.approval_cache.insert(request.approval_command.clone());
-                }
-                session
-                    .notify_background_event(&context.sub_id, "retrying command without sandbox")
-                    .await;
-
-                let retry_output = self
-                    .spawn(
-                        request.params.clone(),
-                        SandboxType::None,
-                        config,
-                        stdout_stream,
-                    )
-                    .await?;
-
-                Ok(retry_output)
-            }
-            ReviewDecision::Denied | ReviewDecision::Abort => {
-                Err(ExecError::denied("exec command rejected by user"))
-            }
-        }
-    }
-
-    async fn spawn(
-        &self,
-        params: ExecParams,
-        sandbox: SandboxType,
-        config: &ExecutorConfig,
-        stdout_stream: Option<StdoutStream>,
-    ) -> Result<ExecToolCallOutput, CodexErr> {
-        process_exec_tool_call(
-            params,
-            sandbox,
-            &config.sandbox_policy,
-            &config.sandbox_cwd,
-            &config.codex_exe,
-            stdout_stream,
-        )
-        .await
-    }
-}
-
-fn maybe_translate_shell_command(
-    params: ExecParams,
-    session: &Session,
-    use_shell_profile: bool,
-) -> ExecParams {
-    let should_translate =
-        matches!(session.user_shell(), shell::Shell::PowerShell(_)) || use_shell_profile;
-
-    if should_translate
-        && let Some(command) = session
-            .user_shell()
-            .format_default_shell_invocation(params.command.clone())
-    {
-        return ExecParams { command, ..params };
-    }
-
-    params
-}
-
-fn sandbox_failure_message(error: SandboxErr) -> String {
-    let codex_error = CodexErr::Sandbox(error);
-    let friendly = get_error_message_ui(&codex_error);
-    format!("failed in sandbox: {friendly}")
-}
-
-pub(crate) struct ExecutionRequest {
-    pub params: ExecParams,
-    pub approval_command: Vec<String>,
-    pub mode: ExecutionMode,
-    pub stdout_stream: Option<StdoutStream>,
-    pub use_shell_profile: bool,
-}
-
-pub(crate) struct NormalizedExecOutput<'a> {
-    borrowed: Option<&'a ExecToolCallOutput>,
-    synthetic: Option<ExecToolCallOutput>,
-}
-
-impl<'a> NormalizedExecOutput<'a> {
-    pub(crate) fn event_output(&'a self) -> &'a ExecToolCallOutput {
-        match (self.borrowed, self.synthetic.as_ref()) {
-            (Some(output), _) => output,
-            (None, Some(output)) => output,
-            (None, None) => unreachable!("normalized exec output missing data"),
-        }
-    }
-}
-
-/// Converts a raw execution result into a uniform view that always exposes an
-/// [`ExecToolCallOutput`], synthesizing error output when the command fails
-/// before producing a response.
-pub(crate) fn normalize_exec_result(
-    result: &Result<ExecToolCallOutput, ExecError>,
-) -> NormalizedExecOutput<'_> {
-    match result {
-        Ok(output) => NormalizedExecOutput {
-            borrowed: Some(output),
-            synthetic: None,
-        },
-        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => {
-            NormalizedExecOutput {
-                borrowed: Some(output.as_ref()),
-                synthetic: None,
-            }
-        }
-        Err(err) => {
-            let message = match err {
-                ExecError::Function(FunctionCallError::RespondToModel(msg))
-                | ExecError::Function(FunctionCallError::Denied(msg)) => msg.clone(),
-                ExecError::Codex(e) => get_error_message_ui(e),
-                err => err.to_string(),
-            };
-            let synthetic = ExecToolCallOutput {
-                exit_code: -1,
-                stdout: StreamOutput::new(String::new()),
-                stderr: StreamOutput::new(message.clone()),
-                aggregated_output: StreamOutput::new(message),
-                duration: Duration::default(),
-                timed_out: false,
-            };
-            NormalizedExecOutput {
-                borrowed: None,
-                synthetic: Some(synthetic),
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::error::CodexErr;
-    use crate::error::EnvVarError;
-    use crate::error::SandboxErr;
-    use crate::exec::StreamOutput;
-    use pretty_assertions::assert_eq;
-
-    fn make_output(text: &str) -> ExecToolCallOutput {
-        ExecToolCallOutput {
-            exit_code: 1,
-            stdout: StreamOutput::new(String::new()),
-            stderr: StreamOutput::new(String::new()),
-            aggregated_output: StreamOutput::new(text.to_string()),
-            duration: Duration::from_millis(123),
-            timed_out: false,
-        }
-    }
-
-    #[test]
-    fn normalize_success_borrows() {
-        let out = make_output("ok");
-        let result: Result<ExecToolCallOutput, ExecError> = Ok(out);
-        let normalized = normalize_exec_result(&result);
-        assert_eq!(normalized.event_output().aggregated_output.text, "ok");
-    }
-
-    #[test]
-    fn normalize_timeout_borrows_embedded_output() {
-        let out = make_output("timed out payload");
-        let err = CodexErr::Sandbox(SandboxErr::Timeout {
-            output: Box::new(out),
-        });
-        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(err));
-        let normalized = normalize_exec_result(&result);
-        assert_eq!(
-            normalized.event_output().aggregated_output.text,
-            "timed out payload"
-        );
-    }
-
-    #[test]
-    fn sandbox_failure_message_uses_denied_stderr() {
-        let output = ExecToolCallOutput {
-            exit_code: 101,
-            stdout: StreamOutput::new(String::new()),
-            stderr: StreamOutput::new("sandbox stderr".to_string()),
-            aggregated_output: StreamOutput::new(String::new()),
-            duration: Duration::from_millis(10),
-            timed_out: false,
-        };
-        let err = SandboxErr::Denied {
-            output: Box::new(output),
-        };
-        let message = sandbox_failure_message(err);
-        assert_eq!(message, "failed in sandbox: sandbox stderr");
-    }
-
-    #[test]
-    fn sandbox_failure_message_falls_back_to_aggregated_output() {
-        let output = ExecToolCallOutput {
-            exit_code: 101,
-            stdout: StreamOutput::new(String::new()),
-            stderr: StreamOutput::new(String::new()),
-            aggregated_output: StreamOutput::new("aggregate text".to_string()),
-            duration: Duration::from_millis(10),
-            timed_out: false,
-        };
-        let err = SandboxErr::Denied {
-            output: Box::new(output),
-        };
-        let message = sandbox_failure_message(err);
-        assert_eq!(message, "failed in sandbox: aggregate text");
-    }
-
-    #[test]
-    fn normalize_function_error_synthesizes_payload() {
-        let err = FunctionCallError::RespondToModel("boom".to_string());
-        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Function(err));
-        let normalized = normalize_exec_result(&result);
-        assert_eq!(normalized.event_output().aggregated_output.text, "boom");
-    }
-
-    #[test]
-    fn normalize_codex_error_synthesizes_user_message() {
-        // Use a simple EnvVar error which formats to a clear message
-        let e = CodexErr::EnvVar(EnvVarError {
-            var: "FOO".to_string(),
-            instructions: Some("set it".to_string()),
-        });
-        let result: Result<ExecToolCallOutput, ExecError> = Err(ExecError::Codex(e));
-        let normalized = normalize_exec_result(&result);
-        assert!(
-            normalized
-                .event_output()
-                .aggregated_output
-                .text
-                .contains("Missing environment variable: `FOO`"),
-            "expected synthesized user-friendly message"
-        );
-    }
-}
--- a/codex-rs/core/src/executor/sandbox.rs
+++ b/codex-rs/core/src/executor/sandbox.rs
@@ -1,405 +0,0 @@
-use crate::apply_patch::ApplyPatchExec;
-use crate::codex::Session;
-use crate::exec::SandboxType;
-use crate::executor::ExecutionMode;
-use crate::executor::ExecutionRequest;
-use crate::executor::ExecutorConfig;
-use crate::executor::errors::ExecError;
-use crate::safety::SafetyCheck;
-use crate::safety::assess_command_safety;
-use crate::safety::assess_patch_safety;
-use codex_otel::otel_event_manager::OtelEventManager;
-use codex_otel::otel_event_manager::ToolDecisionSource;
-use codex_protocol::protocol::AskForApproval;
-use codex_protocol::protocol::ReviewDecision;
-use std::collections::HashSet;
-
-/// Sandbox placement options selected for an execution run, including whether
-/// to escalate after failures and whether approvals should persist.
-pub(crate) struct SandboxDecision {
-    pub(crate) initial_sandbox: SandboxType,
-    pub(crate) escalate_on_failure: bool,
-    pub(crate) record_session_approval: bool,
-}
-
-impl SandboxDecision {
-    fn auto(sandbox: SandboxType, escalate_on_failure: bool) -> Self {
-        Self {
-            initial_sandbox: sandbox,
-            escalate_on_failure,
-            record_session_approval: false,
-        }
-    }
-
-    fn user_override(record_session_approval: bool) -> Self {
-        Self {
-            initial_sandbox: SandboxType::None,
-            escalate_on_failure: false,
-            record_session_approval,
-        }
-    }
-}
-
-fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> bool {
-    matches!(
-        (approval, sandbox),
-        (
-            AskForApproval::UnlessTrusted | AskForApproval::OnFailure,
-            SandboxType::MacosSeatbelt | SandboxType::LinuxSeccomp
-        )
-    )
-}
-
-/// Determines how a command should be sandboxed, prompting the user when
-/// policy requires explicit approval.
-#[allow(clippy::too_many_arguments)]
-pub async fn select_sandbox(
-    request: &ExecutionRequest,
-    approval_policy: AskForApproval,
-    approval_cache: HashSet<Vec<String>>,
-    config: &ExecutorConfig,
-    session: &Session,
-    sub_id: &str,
-    call_id: &str,
-    otel_event_manager: &OtelEventManager,
-) -> Result<SandboxDecision, ExecError> {
-    match &request.mode {
-        ExecutionMode::Shell => {
-            select_shell_sandbox(
-                request,
-                approval_policy,
-                approval_cache,
-                config,
-                session,
-                sub_id,
-                call_id,
-                otel_event_manager,
-            )
-            .await
-        }
-        ExecutionMode::ApplyPatch(exec) => {
-            select_apply_patch_sandbox(exec, approval_policy, config)
-        }
-    }
-}
-
-#[allow(clippy::too_many_arguments)]
-async fn select_shell_sandbox(
-    request: &ExecutionRequest,
-    approval_policy: AskForApproval,
-    approved_snapshot: HashSet<Vec<String>>,
-    config: &ExecutorConfig,
-    session: &Session,
-    sub_id: &str,
-    call_id: &str,
-    otel_event_manager: &OtelEventManager,
-) -> Result<SandboxDecision, ExecError> {
-    let command_for_safety = if request.approval_command.is_empty() {
-        request.params.command.clone()
-    } else {
-        request.approval_command.clone()
-    };
-
-    let safety = assess_command_safety(
-        &command_for_safety,
-        approval_policy,
-        &config.sandbox_policy,
-        &approved_snapshot,
-        request.params.with_escalated_permissions.unwrap_or(false),
-    );
-
-    match safety {
-        SafetyCheck::AutoApprove {
-            sandbox_type,
-            user_explicitly_approved,
-        } => {
-            let mut decision = SandboxDecision::auto(
-                sandbox_type,
-                should_escalate_on_failure(approval_policy, sandbox_type),
-            );
-            if user_explicitly_approved {
-                decision.record_session_approval = true;
-            }
-            let (decision_for_event, source) = if user_explicitly_approved {
-                (ReviewDecision::ApprovedForSession, ToolDecisionSource::User)
-            } else {
-                (ReviewDecision::Approved, ToolDecisionSource::Config)
-            };
-            otel_event_manager.tool_decision("local_shell", call_id, decision_for_event, source);
-            Ok(decision)
-        }
-        SafetyCheck::AskUser => {
-            let decision = session
-                .request_command_approval(
-                    sub_id.to_string(),
-                    call_id.to_string(),
-                    request.approval_command.clone(),
-                    request.params.cwd.clone(),
-                    request.params.justification.clone(),
-                )
-                .await;
-
-            otel_event_manager.tool_decision(
-                "local_shell",
-                call_id,
-                decision,
-                ToolDecisionSource::User,
-            );
-            match decision {
-                ReviewDecision::Approved => Ok(SandboxDecision::user_override(false)),
-                ReviewDecision::ApprovedForSession => Ok(SandboxDecision::user_override(true)),
-                ReviewDecision::Denied | ReviewDecision::Abort => {
-                    Err(ExecError::denied("exec command rejected by user"))
-                }
-            }
-        }
-        SafetyCheck::Reject { reason } => Err(ExecError::rejection(format!(
-            "exec command rejected: {reason}"
-        ))),
-    }
-}
-
-fn select_apply_patch_sandbox(
-    exec: &ApplyPatchExec,
-    approval_policy: AskForApproval,
-    config: &ExecutorConfig,
-) -> Result<SandboxDecision, ExecError> {
-    if exec.user_explicitly_approved_this_action {
-        return Ok(SandboxDecision::user_override(false));
-    }
-
-    match assess_patch_safety(
-        &exec.action,
-        approval_policy,
-        &config.sandbox_policy,
-        &config.sandbox_cwd,
-    ) {
-        SafetyCheck::AutoApprove { sandbox_type, .. } => Ok(SandboxDecision::auto(
-            sandbox_type,
-            should_escalate_on_failure(approval_policy, sandbox_type),
-        )),
-        SafetyCheck::AskUser => Err(ExecError::rejection(
-            "patch requires approval but none was recorded",
-        )),
-        SafetyCheck::Reject { reason } => {
-            Err(ExecError::rejection(format!("patch rejected: {reason}")))
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::codex::make_session_and_context;
-    use crate::exec::ExecParams;
-    use crate::function_tool::FunctionCallError;
-    use crate::protocol::SandboxPolicy;
-    use codex_apply_patch::ApplyPatchAction;
-    use pretty_assertions::assert_eq;
-
-    #[tokio::test]
-    async fn select_apply_patch_user_override_when_explicit() {
-        let (session, ctx) = make_session_and_context();
-        let tmp = tempfile::tempdir().expect("tmp");
-        let p = tmp.path().join("a.txt");
-        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
-        let exec = ApplyPatchExec {
-            action,
-            user_explicitly_approved_this_action: true,
-        };
-        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
-        let request = ExecutionRequest {
-            params: ExecParams {
-                command: vec!["apply_patch".into()],
-                cwd: std::env::temp_dir(),
-                timeout_ms: None,
-                env: std::collections::HashMap::new(),
-                with_escalated_permissions: None,
-                justification: None,
-            },
-            approval_command: vec!["apply_patch".into()],
-            mode: ExecutionMode::ApplyPatch(exec),
-            stdout_stream: None,
-            use_shell_profile: false,
-        };
-        let otel_event_manager = ctx.client.get_otel_event_manager();
-        let decision = select_sandbox(
-            &request,
-            AskForApproval::OnRequest,
-            Default::default(),
-            &cfg,
-            &session,
-            "sub",
-            "call",
-            &otel_event_manager,
-        )
-        .await
-        .expect("ok");
-        // Explicit user override runs without sandbox
-        assert_eq!(decision.initial_sandbox, SandboxType::None);
-        assert_eq!(decision.escalate_on_failure, false);
-    }
-
-    #[tokio::test]
-    async fn select_apply_patch_autoapprove_in_danger() {
-        let (session, ctx) = make_session_and_context();
-        let tmp = tempfile::tempdir().expect("tmp");
-        let p = tmp.path().join("a.txt");
-        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
-        let exec = ApplyPatchExec {
-            action,
-            user_explicitly_approved_this_action: false,
-        };
-        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
-        let request = ExecutionRequest {
-            params: ExecParams {
-                command: vec!["apply_patch".into()],
-                cwd: std::env::temp_dir(),
-                timeout_ms: None,
-                env: std::collections::HashMap::new(),
-                with_escalated_permissions: None,
-                justification: None,
-            },
-            approval_command: vec!["apply_patch".into()],
-            mode: ExecutionMode::ApplyPatch(exec),
-            stdout_stream: None,
-            use_shell_profile: false,
-        };
-        let otel_event_manager = ctx.client.get_otel_event_manager();
-        let decision = select_sandbox(
-            &request,
-            AskForApproval::OnRequest,
-            Default::default(),
-            &cfg,
-            &session,
-            "sub",
-            "call",
-            &otel_event_manager,
-        )
-        .await
-        .expect("ok");
-        // On platforms with a sandbox, DangerFullAccess still prefers it
-        let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None);
-        assert_eq!(decision.initial_sandbox, expected);
-        assert_eq!(decision.escalate_on_failure, false);
-    }
-
-    #[tokio::test]
-    async fn select_apply_patch_requires_approval_on_unless_trusted() {
-        let (session, ctx) = make_session_and_context();
-        let tempdir = tempfile::tempdir().expect("tmpdir");
-        let p = tempdir.path().join("a.txt");
-        let action = ApplyPatchAction::new_add_for_test(&p, "hello".to_string());
-        let exec = ApplyPatchExec {
-            action,
-            user_explicitly_approved_this_action: false,
-        };
-        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
-        let request = ExecutionRequest {
-            params: ExecParams {
-                command: vec!["apply_patch".into()],
-                cwd: std::env::temp_dir(),
-                timeout_ms: None,
-                env: std::collections::HashMap::new(),
-                with_escalated_permissions: None,
-                justification: None,
-            },
-            approval_command: vec!["apply_patch".into()],
-            mode: ExecutionMode::ApplyPatch(exec),
-            stdout_stream: None,
-            use_shell_profile: false,
-        };
-        let otel_event_manager = ctx.client.get_otel_event_manager();
-        let result = select_sandbox(
-            &request,
-            AskForApproval::UnlessTrusted,
-            Default::default(),
-            &cfg,
-            &session,
-            "sub",
-            "call",
-            &otel_event_manager,
-        )
-        .await;
-        match result {
-            Ok(_) => panic!("expected error"),
-            Err(ExecError::Function(FunctionCallError::RespondToModel(msg))) => {
-                assert!(msg.contains("requires approval"))
-            }
-            Err(other) => panic!("unexpected error: {other:?}"),
-        }
-    }
-
-    #[tokio::test]
-    async fn select_shell_autoapprove_in_danger_mode() {
-        let (session, ctx) = make_session_and_context();
-        let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None);
-        let request = ExecutionRequest {
-            params: ExecParams {
-                command: vec!["some-unknown".into()],
-                cwd: std::env::temp_dir(),
-                timeout_ms: None,
-                env: std::collections::HashMap::new(),
-                with_escalated_permissions: None,
-                justification: None,
-            },
-            approval_command: vec!["some-unknown".into()],
-            mode: ExecutionMode::Shell,
-            stdout_stream: None,
-            use_shell_profile: false,
-        };
-        let otel_event_manager = ctx.client.get_otel_event_manager();
-        let decision = select_sandbox(
-            &request,
-            AskForApproval::OnRequest,
-            Default::default(),
-            &cfg,
-            &session,
-            "sub",
-            "call",
-            &otel_event_manager,
-        )
-        .await
-        .expect("ok");
-        assert_eq!(decision.initial_sandbox, SandboxType::None);
-        assert_eq!(decision.escalate_on_failure, false);
-    }
-
-    #[cfg(any(target_os = "macos", target_os = "linux"))]
-    #[tokio::test]
-    async fn select_shell_escalates_on_failure_with_platform_sandbox() {
-        let (session, ctx) = make_session_and_context();
-        let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None);
-        let request = ExecutionRequest {
-            params: ExecParams {
-                // Unknown command => untrusted but not flagged dangerous
-                command: vec!["some-unknown".into()],
-                cwd: std::env::temp_dir(),
-                timeout_ms: None,
-                env: std::collections::HashMap::new(),
-                with_escalated_permissions: None,
-                justification: None,
-            },
-            approval_command: vec!["some-unknown".into()],
-            mode: ExecutionMode::Shell,
-            stdout_stream: None,
-            use_shell_profile: false,
-        };
-        let otel_event_manager = ctx.client.get_otel_event_manager();
-        let decision = select_sandbox(
-            &request,
-            AskForApproval::OnFailure,
-            Default::default(),
-            &cfg,
-            &session,
-            "sub",
-            "call",
-            &otel_event_manager,
-        )
-        .await
-        .expect("ok");
-        // On macOS/Linux we should have a platform sandbox and escalate on failure
-        assert_ne!(decision.initial_sandbox, SandboxType::None);
-        assert_eq!(decision.escalate_on_failure, true);
-    }
-}
--- a/codex-rs/core/src/function_tool.rs
+++ b/codex-rs/core/src/function_tool.rs
@@ -5,6 +5,7 @@ pub enum FunctionCallError {
    #[error("{0}")]
    RespondToModel(String),
    #[error("{0}")]
+    #[allow(dead_code)] // TODO(jif) fix in a follow-up PR
    Denied(String),
    #[error("LocalShellCall without call_id or id")]
    MissingLocalShellCallId,
--- a/codex-rs/core/src/landlock.rs
+++ b/codex-rs/core/src/landlock.rs
@@ -40,7 +40,7 @@ where
 }

 /// Converts the sandbox policy into the CLI invocation for `codex-linux-sandbox`.
-fn create_linux_sandbox_command_args(
+pub(crate) fn create_linux_sandbox_command_args(
    command: Vec<String>,
    sandbox_policy: &SandboxPolicy,
    sandbox_policy_cwd: &Path,
@@ -56,7 +56,9 @@ fn create_linux_sandbox_command_args(
        serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON");

    let mut linux_cmd: Vec<String> = vec![
+        "--sandbox-policy-cwd".to_string(),
        sandbox_policy_cwd,
+        "--sandbox-policy".to_string(),
        sandbox_policy_json,
        // Separator so that command arguments starting with `-` are not parsed as
        // options of the helper itself.
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -25,9 +25,7 @@ pub mod custom_prompts;
 mod environment_context;
 pub mod error;
 pub mod exec;
-mod exec_command;
 pub mod exec_env;
-pub mod executor;
 pub mod features;
 mod flags;
 pub mod git_info;
@@ -38,6 +36,7 @@ mod mcp_tool_call;
 mod message_history;
 mod model_provider_info;
 pub mod parse_command;
+pub mod sandboxing;
 pub mod token_data;
 mod truncate;
 mod unified_exec;
@@ -59,7 +58,6 @@ pub use auth::CodexAuth;
 pub mod default_client;
 pub mod model_family;
 mod openai_model_info;
-mod openai_tools;
 pub mod project_doc;
 mod rollout;
 pub(crate) mod safety;
--- a/codex-rs/core/src/openai_tools.rs
+++ b/codex-rs/core/src/openai_tools.rs
@@ -1 +0,0 @@
-pub use crate::tools::spec::*;
--- a/codex-rs/core/src/safety.rs
+++ b/codex-rs/core/src/safety.rs
@@ -1,4 +1,3 @@
-use std::collections::HashSet;
 use std::path::Component;
 use std::path::Path;
 use std::path::PathBuf;
@@ -8,8 +7,6 @@ use codex_apply_patch::ApplyPatchFileChange;

 use crate::exec::SandboxType;

-use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
-use crate::command_safety::is_safe_command::is_known_safe_command;
 use crate::protocol::AskForApproval;
 use crate::protocol::SandboxPolicy;

@@ -48,30 +45,29 @@ pub fn assess_patch_safety(
        }
    }

-    // Even though the patch *appears* to be constrained to writable paths, it
-    // is possible that paths in the patch are hard links to files outside the
-    // writable roots, so we should still run `apply_patch` in a sandbox in that
-    // case.
+    // Even though the patch appears to be constrained to writable paths, it is
+    // possible that paths in the patch are hard links to files outside the
+    // writable roots, so we should still run `apply_patch` in a sandbox in that case.
    if is_write_patch_constrained_to_writable_paths(action, sandbox_policy, cwd)
        || policy == AskForApproval::OnFailure
    {
-        // Only auto‑approve when we can actually enforce a sandbox. Otherwise
-        // fall back to asking the user because the patch may touch arbitrary
-        // paths outside the project.
-        match get_platform_sandbox() {
-            Some(sandbox_type) => SafetyCheck::AutoApprove {
-                sandbox_type,
+        if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
+            // DangerFullAccess is intended to bypass sandboxing entirely.
+            SafetyCheck::AutoApprove {
+                sandbox_type: SandboxType::None,
                user_explicitly_approved: false,
-            },
-            None if sandbox_policy == &SandboxPolicy::DangerFullAccess => {
-                // If the user has explicitly requested DangerFullAccess, then
-                // we can auto-approve even without a sandbox.
-                SafetyCheck::AutoApprove {
-                    sandbox_type: SandboxType::None,
-                    user_explicitly_approved: false,
-                }
            }
-            None => SafetyCheck::AskUser,
+        } else {
+            // Only auto‑approve when we can actually enforce a sandbox. Otherwise
+            // fall back to asking the user because the patch may touch arbitrary
+            // paths outside the project.
+            match get_platform_sandbox() {
+                Some(sandbox_type) => SafetyCheck::AutoApprove {
+                    sandbox_type,
+                    user_explicitly_approved: false,
+                },
+                None => SafetyCheck::AskUser,
+            }
        }
    } else if policy == AskForApproval::Never {
        SafetyCheck::Reject {
@@ -83,124 +79,6 @@ pub fn assess_patch_safety(
    }
 }

-/// For a command to be run _without_ a sandbox, one of the following must be
-/// true:
-///
-/// - the user has explicitly approved the command
-/// - the command is on the "known safe" list
-/// - `DangerFullAccess` was specified and `UnlessTrusted` was not
-pub fn assess_command_safety(
-    command: &[String],
-    approval_policy: AskForApproval,
-    sandbox_policy: &SandboxPolicy,
-    approved: &HashSet<Vec<String>>,
-    with_escalated_permissions: bool,
-) -> SafetyCheck {
-    // Some commands look dangerous. Even if they are run inside a sandbox,
-    // unless the user has explicitly approved them, we should ask,
-    // or reject if the approval_policy tells us not to ask.
-    if command_might_be_dangerous(command) && !approved.contains(command) {
-        if approval_policy == AskForApproval::Never {
-            return SafetyCheck::Reject {
-                reason: "dangerous command detected; rejected by user approval settings"
-                    .to_string(),
-            };
-        }
-
-        return SafetyCheck::AskUser;
-    }
-
-    // A command is "trusted" because either:
-    // - it belongs to a set of commands we consider "safe" by default, or
-    // - the user has explicitly approved the command for this session
-    //
-    // Currently, whether a command is "trusted" is a simple boolean, but we
-    // should include more metadata on this command test to indicate whether it
-    // should be run inside a sandbox or not. (This could be something the user
-    // defines as part of `execpolicy`.)
-    //
-    // For example, when `is_known_safe_command(command)` returns `true`, it
-    // would probably be fine to run the command in a sandbox, but when
-    // `approved.contains(command)` is `true`, the user may have approved it for
-    // the session _because_ they know it needs to run outside a sandbox.
-
-    if is_known_safe_command(command) || approved.contains(command) {
-        let user_explicitly_approved = approved.contains(command);
-        return SafetyCheck::AutoApprove {
-            sandbox_type: SandboxType::None,
-            user_explicitly_approved,
-        };
-    }
-
-    assess_safety_for_untrusted_command(approval_policy, sandbox_policy, with_escalated_permissions)
-}
-
-pub(crate) fn assess_safety_for_untrusted_command(
-    approval_policy: AskForApproval,
-    sandbox_policy: &SandboxPolicy,
-    with_escalated_permissions: bool,
-) -> SafetyCheck {
-    use AskForApproval::*;
-    use SandboxPolicy::*;
-
-    match (approval_policy, sandbox_policy) {
-        (UnlessTrusted, _) => {
-            // Even though the user may have opted into DangerFullAccess,
-            // they also requested that we ask for approval for untrusted
-            // commands.
-            SafetyCheck::AskUser
-        }
-        (OnFailure, DangerFullAccess)
-        | (Never, DangerFullAccess)
-        | (OnRequest, DangerFullAccess) => SafetyCheck::AutoApprove {
-            sandbox_type: SandboxType::None,
-            user_explicitly_approved: false,
-        },
-        (OnRequest, ReadOnly) | (OnRequest, WorkspaceWrite { .. }) => {
-            if with_escalated_permissions {
-                SafetyCheck::AskUser
-            } else {
-                match get_platform_sandbox() {
-                    Some(sandbox_type) => SafetyCheck::AutoApprove {
-                        sandbox_type,
-                        user_explicitly_approved: false,
-                    },
-                    // Fall back to asking since the command is untrusted and
-                    // we do not have a sandbox available
-                    None => SafetyCheck::AskUser,
-                }
-            }
-        }
-        (Never, ReadOnly)
-        | (Never, WorkspaceWrite { .. })
-        | (OnFailure, ReadOnly)
-        | (OnFailure, WorkspaceWrite { .. }) => {
-            match get_platform_sandbox() {
-                Some(sandbox_type) => SafetyCheck::AutoApprove {
-                    sandbox_type,
-                    user_explicitly_approved: false,
-                },
-                None => {
-                    if matches!(approval_policy, OnFailure) {
-                        // Since the command is not trusted, even though the
-                        // user has requested to only ask for approval on
-                        // failure, we will ask the user because no sandbox is
-                        // available.
-                        SafetyCheck::AskUser
-                    } else {
-                        // We are in non-interactive mode and lack approval, so
-                        // all we can do is reject the command.
-                        SafetyCheck::Reject {
-                            reason: "auto-rejected because command is not on trusted list"
-                                .to_string(),
-                        }
-                    }
-                }
-            }
-        }
-    }
-}
-
 pub fn get_platform_sandbox() -> Option<SandboxType> {
    if cfg!(target_os = "macos") {
        Some(SandboxType::MacosSeatbelt)
@@ -339,101 +217,4 @@ mod tests {
            &cwd,
        ));
    }
-
-    #[test]
-    fn test_request_escalated_privileges() {
-        // Should not be a trusted command
-        let command = vec!["git commit".to_string()];
-        let approval_policy = AskForApproval::OnRequest;
-        let sandbox_policy = SandboxPolicy::ReadOnly;
-        let approved: HashSet<Vec<String>> = HashSet::new();
-        let request_escalated_privileges = true;
-
-        let safety_check = assess_command_safety(
-            &command,
-            approval_policy,
-            &sandbox_policy,
-            &approved,
-            request_escalated_privileges,
-        );
-
-        assert_eq!(safety_check, SafetyCheck::AskUser);
-    }
-
-    #[test]
-    fn dangerous_command_allowed_if_explicitly_approved() {
-        let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()];
-        let approval_policy = AskForApproval::OnRequest;
-        let sandbox_policy = SandboxPolicy::ReadOnly;
-        let mut approved: HashSet<Vec<String>> = HashSet::new();
-        approved.insert(command.clone());
-        let request_escalated_privileges = false;
-
-        let safety_check = assess_command_safety(
-            &command,
-            approval_policy,
-            &sandbox_policy,
-            &approved,
-            request_escalated_privileges,
-        );
-
-        assert_eq!(
-            safety_check,
-            SafetyCheck::AutoApprove {
-                sandbox_type: SandboxType::None,
-                user_explicitly_approved: true,
-            }
-        );
-    }
-
-    #[test]
-    fn dangerous_command_not_allowed_if_not_explicitly_approved() {
-        let command = vec!["git".to_string(), "reset".to_string(), "--hard".to_string()];
-        let approval_policy = AskForApproval::Never;
-        let sandbox_policy = SandboxPolicy::ReadOnly;
-        let approved: HashSet<Vec<String>> = HashSet::new();
-        let request_escalated_privileges = false;
-
-        let safety_check = assess_command_safety(
-            &command,
-            approval_policy,
-            &sandbox_policy,
-            &approved,
-            request_escalated_privileges,
-        );
-
-        assert_eq!(
-            safety_check,
-            SafetyCheck::Reject {
-                reason: "dangerous command detected; rejected by user approval settings"
-                    .to_string(),
-            }
-        );
-    }
-
-    #[test]
-    fn test_request_escalated_privileges_no_sandbox_fallback() {
-        let command = vec!["git".to_string(), "commit".to_string()];
-        let approval_policy = AskForApproval::OnRequest;
-        let sandbox_policy = SandboxPolicy::ReadOnly;
-        let approved: HashSet<Vec<String>> = HashSet::new();
-        let request_escalated_privileges = false;
-
-        let safety_check = assess_command_safety(
-            &command,
-            approval_policy,
-            &sandbox_policy,
-            &approved,
-            request_escalated_privileges,
-        );
-
-        let expected = match get_platform_sandbox() {
-            Some(sandbox_type) => SafetyCheck::AutoApprove {
-                sandbox_type,
-                user_explicitly_approved: false,
-            },
-            None => SafetyCheck::AskUser,
-        };
-        assert_eq!(safety_check, expected);
-    }
 }
--- a/codex-rs/core/src/sandboxing/mod.rs
+++ b/codex-rs/core/src/sandboxing/mod.rs
@@ -0,0 +1,170 @@
+/*
+Module: sandboxing
+
+Build platform wrappers and produce ExecEnv for execution. Owns low‑level
+sandbox placement and transformation of portable CommandSpec into a
+ready‑to‑spawn environment.
+*/
+use crate::exec::ExecToolCallOutput;
+use crate::exec::SandboxType;
+use crate::exec::StdoutStream;
+use crate::exec::execute_exec_env;
+use crate::landlock::create_linux_sandbox_command_args;
+use crate::protocol::SandboxPolicy;
+use crate::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE;
+use crate::seatbelt::create_seatbelt_command_args;
+use crate::spawn::CODEX_SANDBOX_ENV_VAR;
+use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use crate::tools::sandboxing::SandboxablePreference;
+use std::collections::HashMap;
+use std::path::Path;
+use std::path::PathBuf;
+
+#[derive(Clone, Debug)]
+pub struct CommandSpec {
+    pub program: String,
+    pub args: Vec<String>,
+    pub cwd: PathBuf,
+    pub env: HashMap<String, String>,
+    pub timeout_ms: Option<u64>,
+    pub with_escalated_permissions: Option<bool>,
+    pub justification: Option<String>,
+}
+
+#[derive(Clone, Debug)]
+pub struct ExecEnv {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+    pub env: HashMap<String, String>,
+    pub timeout_ms: Option<u64>,
+    pub sandbox: SandboxType,
+    pub with_escalated_permissions: Option<bool>,
+    pub justification: Option<String>,
+    pub arg0: Option<String>,
+}
+
+pub enum SandboxPreference {
+    Auto,
+    Require,
+    Forbid,
+}
+
+#[derive(Debug, thiserror::Error)]
+pub(crate) enum SandboxTransformError {
+    #[error("missing codex-linux-sandbox executable path")]
+    MissingLinuxSandboxExecutable,
+}
+
+#[derive(Default)]
+pub struct SandboxManager;
+
+impl SandboxManager {
+    pub fn new() -> Self {
+        Self
+    }
+
+    pub(crate) fn select_initial(
+        &self,
+        policy: &SandboxPolicy,
+        pref: SandboxablePreference,
+    ) -> SandboxType {
+        match pref {
+            SandboxablePreference::Forbid => SandboxType::None,
+            SandboxablePreference::Require => {
+                #[cfg(target_os = "macos")]
+                {
+                    return SandboxType::MacosSeatbelt;
+                }
+                #[cfg(target_os = "linux")]
+                {
+                    return SandboxType::LinuxSeccomp;
+                }
+                #[allow(unreachable_code)]
+                SandboxType::None
+            }
+            SandboxablePreference::Auto => match policy {
+                SandboxPolicy::DangerFullAccess => SandboxType::None,
+                #[cfg(target_os = "macos")]
+                _ => SandboxType::MacosSeatbelt,
+                #[cfg(target_os = "linux")]
+                _ => SandboxType::LinuxSeccomp,
+                #[cfg(not(any(target_os = "macos", target_os = "linux")))]
+                _ => SandboxType::None,
+            },
+        }
+    }
+
+    pub(crate) fn transform(
+        &self,
+        spec: &CommandSpec,
+        policy: &SandboxPolicy,
+        sandbox: SandboxType,
+        sandbox_policy_cwd: &Path,
+        codex_linux_sandbox_exe: Option<&PathBuf>,
+    ) -> Result<ExecEnv, SandboxTransformError> {
+        let mut env = spec.env.clone();
+        if !policy.has_full_network_access() {
+            env.insert(
+                CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(),
+                "1".to_string(),
+            );
+        }
+
+        let mut command = Vec::with_capacity(1 + spec.args.len());
+        command.push(spec.program.clone());
+        command.extend(spec.args.iter().cloned());
+
+        let (command, sandbox_env, arg0_override) = match sandbox {
+            SandboxType::None => (command, HashMap::new(), None),
+            SandboxType::MacosSeatbelt => {
+                let mut seatbelt_env = HashMap::new();
+                seatbelt_env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string());
+                let mut args =
+                    create_seatbelt_command_args(command.clone(), policy, sandbox_policy_cwd);
+                let mut full_command = Vec::with_capacity(1 + args.len());
+                full_command.push(MACOS_PATH_TO_SEATBELT_EXECUTABLE.to_string());
+                full_command.append(&mut args);
+                (full_command, seatbelt_env, None)
+            }
+            SandboxType::LinuxSeccomp => {
+                let exe = codex_linux_sandbox_exe
+                    .ok_or(SandboxTransformError::MissingLinuxSandboxExecutable)?;
+                let mut args =
+                    create_linux_sandbox_command_args(command.clone(), policy, sandbox_policy_cwd);
+                let mut full_command = Vec::with_capacity(1 + args.len());
+                full_command.push(exe.to_string_lossy().to_string());
+                full_command.append(&mut args);
+                (
+                    full_command,
+                    HashMap::new(),
+                    Some("codex-linux-sandbox".to_string()),
+                )
+            }
+        };
+
+        env.extend(sandbox_env);
+
+        Ok(ExecEnv {
+            command,
+            cwd: spec.cwd.clone(),
+            env,
+            timeout_ms: spec.timeout_ms,
+            sandbox,
+            with_escalated_permissions: spec.with_escalated_permissions,
+            justification: spec.justification.clone(),
+            arg0: arg0_override,
+        })
+    }
+
+    pub fn denied(&self, sandbox: SandboxType, out: &ExecToolCallOutput) -> bool {
+        crate::exec::is_likely_sandbox_denied(sandbox, out)
+    }
+}
+
+pub async fn execute_env(
+    env: &ExecEnv,
+    policy: &SandboxPolicy,
+    stdout_stream: Option<StdoutStream>,
+) -> crate::error::Result<ExecToolCallOutput> {
+    execute_exec_env(env.clone(), policy, stdout_stream).await
+}
--- a/codex-rs/core/src/seatbelt.rs
+++ b/codex-rs/core/src/seatbelt.rs
@@ -14,7 +14,7 @@ const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl
 /// to defend against an attacker trying to inject a malicious version on the
 /// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker
 /// already has root access.
-const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";
+pub(crate) const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec";

 pub async fn spawn_command_under_seatbelt(
    command: Vec<String>,
@@ -39,7 +39,7 @@ pub async fn spawn_command_under_seatbelt(
    .await
 }

-fn create_seatbelt_command_args(
+pub(crate) fn create_seatbelt_command_args(
    command: Vec<String>,
    sandbox_policy: &SandboxPolicy,
    sandbox_policy_cwd: &Path,
--- a/codex-rs/core/src/shell.rs
+++ b/codex-rs/core/src/shell.rs
@@ -347,6 +347,7 @@ mod tests {
                    )]),
                    with_escalated_permissions: None,
                    justification: None,
+                    arg0: None,
                },
                SandboxType::None,
                &SandboxPolicy::DangerFullAccess,
@@ -455,6 +456,7 @@ mod macos_tests {
                    )]),
                    with_escalated_permissions: None,
                    justification: None,
+                    arg0: None,
                },
                SandboxType::None,
                &SandboxPolicy::DangerFullAccess,
--- a/codex-rs/core/src/state/service.rs
+++ b/codex-rs/core/src/state/service.rs
@@ -2,9 +2,8 @@ use std::sync::Arc;

 use crate::AuthManager;
 use crate::RolloutRecorder;
-use crate::exec_command::ExecSessionManager;
-use crate::executor::Executor;
 use crate::mcp_connection_manager::McpConnectionManager;
+use crate::tools::sandboxing::ApprovalStore;
 use crate::unified_exec::UnifiedExecSessionManager;
 use crate::user_notification::UserNotifier;
 use codex_otel::otel_event_manager::OtelEventManager;
@@ -12,13 +11,12 @@ use tokio::sync::Mutex;

 pub(crate) struct SessionServices {
    pub(crate) mcp_connection_manager: McpConnectionManager,
-    pub(crate) session_manager: ExecSessionManager,
    pub(crate) unified_exec_manager: UnifiedExecSessionManager,
    pub(crate) notifier: UserNotifier,
    pub(crate) rollout: Mutex<Option<RolloutRecorder>>,
    pub(crate) user_shell: crate::shell::Shell,
    pub(crate) show_raw_agent_reasoning: bool,
-    pub(crate) executor: Executor,
    pub(crate) auth_manager: Arc<AuthManager>,
    pub(crate) otel_event_manager: OtelEventManager,
+    pub(crate) tool_approvals: Mutex<ApprovalStore>,
 }
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -232,6 +232,7 @@ mod tests {
 }

 #[derive(Clone, Debug)]
+#[allow(dead_code)]
 pub(crate) struct ExecCommandContext {
    pub(crate) sub_id: String,
    pub(crate) call_id: String,
@@ -243,6 +244,7 @@ pub(crate) struct ExecCommandContext {
 }

 #[derive(Clone, Debug)]
+#[allow(dead_code)]
 pub(crate) struct ApplyPatchCommandContext {
    pub(crate) user_explicitly_approved_this_action: bool,
    pub(crate) changes: HashMap<PathBuf, FileChange>,
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -0,0 +1,235 @@
+use crate::codex::Session;
+use crate::exec::ExecToolCallOutput;
+use crate::parse_command::parse_command;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+use crate::protocol::ExecCommandBeginEvent;
+use crate::protocol::ExecCommandEndEvent;
+use crate::protocol::FileChange;
+use crate::protocol::PatchApplyBeginEvent;
+use crate::protocol::PatchApplyEndEvent;
+use crate::protocol::TurnDiffEvent;
+use crate::tools::context::SharedTurnDiffTracker;
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::time::Duration;
+
+use super::format_exec_output;
+use super::format_exec_output_str;
+
+#[derive(Clone, Copy)]
+pub(crate) struct ToolEventCtx<'a> {
+    pub session: &'a Session,
+    pub sub_id: &'a str,
+    pub call_id: &'a str,
+    pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
+}
+
+impl<'a> ToolEventCtx<'a> {
+    pub fn new(
+        session: &'a Session,
+        sub_id: &'a str,
+        call_id: &'a str,
+        turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
+    ) -> Self {
+        Self {
+            session,
+            sub_id,
+            call_id,
+            turn_diff_tracker,
+        }
+    }
+}
+
+pub(crate) enum ToolEventStage {
+    Begin,
+    Success(ExecToolCallOutput),
+    Failure(ToolEventFailure),
+}
+
+pub(crate) enum ToolEventFailure {
+    Output(ExecToolCallOutput),
+    Message(String),
+}
+// Concrete, allocation-free emitter: avoid trait objects and boxed futures.
+pub(crate) enum ToolEmitter {
+    Shell {
+        command: Vec<String>,
+        cwd: PathBuf,
+    },
+    ApplyPatch {
+        changes: HashMap<PathBuf, FileChange>,
+        auto_approved: bool,
+    },
+}
+
+impl ToolEmitter {
+    pub fn shell(command: Vec<String>, cwd: PathBuf) -> Self {
+        Self::Shell { command, cwd }
+    }
+
+    pub fn apply_patch(changes: HashMap<PathBuf, FileChange>, auto_approved: bool) -> Self {
+        Self::ApplyPatch {
+            changes,
+            auto_approved,
+        }
+    }
+
+    pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage) {
+        match (self, stage) {
+            (Self::Shell { command, cwd }, ToolEventStage::Begin) => {
+                ctx.session
+                    .send_event(Event {
+                        id: ctx.sub_id.to_string(),
+                        msg: EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+                            call_id: ctx.call_id.to_string(),
+                            command: command.clone(),
+                            cwd: cwd.clone(),
+                            parsed_cmd: parse_command(command),
+                        }),
+                    })
+                    .await;
+            }
+            (Self::Shell { .. }, ToolEventStage::Success(output)) => {
+                emit_exec_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.aggregated_output.text.clone(),
+                    output.exit_code,
+                    output.duration,
+                    format_exec_output_str(&output),
+                )
+                .await;
+            }
+            (Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
+                emit_exec_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.aggregated_output.text.clone(),
+                    output.exit_code,
+                    output.duration,
+                    format_exec_output_str(&output),
+                )
+                .await;
+            }
+            (Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
+                emit_exec_end(
+                    ctx,
+                    String::new(),
+                    (*message).to_string(),
+                    (*message).to_string(),
+                    -1,
+                    Duration::ZERO,
+                    format_exec_output(&message),
+                )
+                .await;
+            }
+
+            (
+                Self::ApplyPatch {
+                    changes,
+                    auto_approved,
+                },
+                ToolEventStage::Begin,
+            ) => {
+                if let Some(tracker) = ctx.turn_diff_tracker {
+                    let mut guard = tracker.lock().await;
+                    guard.on_patch_begin(changes);
+                }
+                ctx.session
+                    .send_event(Event {
+                        id: ctx.sub_id.to_string(),
+                        msg: EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+                            call_id: ctx.call_id.to_string(),
+                            auto_approved: *auto_approved,
+                            changes: changes.clone(),
+                        }),
+                    })
+                    .await;
+            }
+            (Self::ApplyPatch { .. }, ToolEventStage::Success(output)) => {
+                emit_patch_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.exit_code == 0,
+                )
+                .await;
+            }
+            (
+                Self::ApplyPatch { .. },
+                ToolEventStage::Failure(ToolEventFailure::Output(output)),
+            ) => {
+                emit_patch_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.exit_code == 0,
+                )
+                .await;
+            }
+            (
+                Self::ApplyPatch { .. },
+                ToolEventStage::Failure(ToolEventFailure::Message(message)),
+            ) => {
+                emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
+            }
+        }
+    }
+}
+
+async fn emit_exec_end(
+    ctx: ToolEventCtx<'_>,
+    stdout: String,
+    stderr: String,
+    aggregated_output: String,
+    exit_code: i32,
+    duration: Duration,
+    formatted_output: String,
+) {
+    ctx.session
+        .send_event(Event {
+            id: ctx.sub_id.to_string(),
+            msg: EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+                call_id: ctx.call_id.to_string(),
+                stdout,
+                stderr,
+                aggregated_output,
+                exit_code,
+                duration,
+                formatted_output,
+            }),
+        })
+        .await;
+}
+
+async fn emit_patch_end(ctx: ToolEventCtx<'_>, stdout: String, stderr: String, success: bool) {
+    ctx.session
+        .send_event(Event {
+            id: ctx.sub_id.to_string(),
+            msg: EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+                call_id: ctx.call_id.to_string(),
+                stdout,
+                stderr,
+                success,
+            }),
+        })
+        .await;
+
+    if let Some(tracker) = ctx.turn_diff_tracker {
+        let unified_diff = {
+            let mut guard = tracker.lock().await;
+            guard.get_unified_diff()
+        };
+        if let Ok(Some(unified_diff)) = unified_diff {
+            ctx.session
+                .send_event(Event {
+                    id: ctx.sub_id.to_string(),
+                    msg: EventMsg::TurnDiff(TurnDiffEvent { unified_diff }),
+                })
+                .await;
+        }
+    }
+}
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -8,7 +8,6 @@ use crate::client_common::tools::ResponsesApiTool;
 use crate::client_common::tools::ToolSpec;
 use crate::exec::ExecParams;
 use crate::function_tool::FunctionCallError;
-use crate::openai_tools::JsonSchema;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
@@ -16,6 +15,7 @@ use crate::tools::handle_container_exec_with_params;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
 use crate::tools::spec::ApplyPatchToolArgs;
+use crate::tools::spec::JsonSchema;
 use async_trait::async_trait;
 use serde::Deserialize;
 use serde::Serialize;
@@ -72,6 +72,7 @@ impl ToolHandler for ApplyPatchHandler {
            env: HashMap::new(),
            with_escalated_permissions: None,
            justification: None,
+            arg0: None,
        };

        let content = handle_container_exec_with_params(
--- a/codex-rs/core/src/tools/handlers/exec_stream.rs
+++ b/codex-rs/core/src/tools/handlers/exec_stream.rs
@@ -1,68 +0,0 @@
-use async_trait::async_trait;
-
-use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
-use crate::exec_command::ExecCommandParams;
-use crate::exec_command::WRITE_STDIN_TOOL_NAME;
-use crate::exec_command::WriteStdinParams;
-use crate::function_tool::FunctionCallError;
-use crate::tools::context::ToolInvocation;
-use crate::tools::context::ToolOutput;
-use crate::tools::context::ToolPayload;
-use crate::tools::registry::ToolHandler;
-use crate::tools::registry::ToolKind;
-
-pub struct ExecStreamHandler;
-
-#[async_trait]
-impl ToolHandler for ExecStreamHandler {
-    fn kind(&self) -> ToolKind {
-        ToolKind::Function
-    }
-
-    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
-        let ToolInvocation {
-            session,
-            tool_name,
-            payload,
-            ..
-        } = invocation;
-
-        let arguments = match payload {
-            ToolPayload::Function { arguments } => arguments,
-            _ => {
-                return Err(FunctionCallError::RespondToModel(
-                    "exec_stream handler received unsupported payload".to_string(),
-                ));
-            }
-        };
-
-        let content = match tool_name.as_str() {
-            EXEC_COMMAND_TOOL_NAME => {
-                let params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| {
-                    FunctionCallError::RespondToModel(format!(
-                        "failed to parse function arguments: {e:?}"
-                    ))
-                })?;
-                session.handle_exec_command_tool(params).await?
-            }
-            WRITE_STDIN_TOOL_NAME => {
-                let params: WriteStdinParams = serde_json::from_str(&arguments).map_err(|e| {
-                    FunctionCallError::RespondToModel(format!(
-                        "failed to parse function arguments: {e:?}"
-                    ))
-                })?;
-                session.handle_write_stdin_tool(params).await?
-            }
-            _ => {
-                return Err(FunctionCallError::RespondToModel(format!(
-                    "exec_stream handler does not support tool {tool_name}"
-                )));
-            }
-        };
-
-        Ok(ToolOutput::Function {
-            content,
-            success: Some(true),
-        })
-    }
-}
--- a/codex-rs/core/src/tools/handlers/mod.rs
+++ b/codex-rs/core/src/tools/handlers/mod.rs
@@ -1,5 +1,4 @@
 pub mod apply_patch;
-mod exec_stream;
 mod grep_files;
 mod list_dir;
 mod mcp;
@@ -14,7 +13,6 @@ mod view_image;
 pub use plan::PLAN_TOOL;

 pub use apply_patch::ApplyPatchHandler;
-pub use exec_stream::ExecStreamHandler;
 pub use grep_files::GrepFilesHandler;
 pub use list_dir::ListDirHandler;
 pub use mcp::McpHandler;
--- a/codex-rs/core/src/tools/handlers/plan.rs
+++ b/codex-rs/core/src/tools/handlers/plan.rs
@@ -2,12 +2,12 @@ use crate::client_common::tools::ResponsesApiTool;
 use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
 use crate::function_tool::FunctionCallError;
-use crate::openai_tools::JsonSchema;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
+use crate::tools::spec::JsonSchema;
 use async_trait::async_trait;
 use codex_protocol::plan_tool::UpdatePlanArgs;
 use codex_protocol::protocol::Event;
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -24,6 +24,7 @@ impl ShellHandler {
            env: create_env(&turn_context.shell_environment_policy),
            with_escalated_permissions: params.with_escalated_permissions,
            justification: params.justification,
+            arg0: None,
        }
    }
 }
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -35,7 +35,13 @@ impl ToolHandler for UnifiedExecHandler {

    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
-            session, payload, ..
+            session,
+            turn,
+            sub_id,
+            call_id,
+            tool_name: _tool_name,
+            payload,
+            ..
        } = invocation;

        let args = match payload {
@@ -73,13 +79,23 @@ impl ToolHandler for UnifiedExecHandler {
        };

        let request = UnifiedExecRequest {
-            session_id: parsed_session_id,
            input_chunks: &input,
            timeout_ms,
        };

        let value = session
-            .run_unified_exec_request(request)
+            .services
+            .unified_exec_manager
+            .handle_request(
+                request,
+                crate::unified_exec::UnifiedExecContext {
+                    session: &session,
+                    turn: turn.as_ref(),
+                    sub_id: &sub_id,
+                    call_id: &call_id,
+                    session_id: parsed_session_id,
+                },
+            )
            .await
            .map_err(|err| {
                FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -1,12 +1,15 @@
 pub mod context;
+pub mod events;
 pub(crate) mod handlers;
+pub mod orchestrator;
 pub mod parallel;
 pub mod registry;
 pub mod router;
+pub mod runtimes;
+pub mod sandboxing;
 pub mod spec;

 use crate::apply_patch;
-use crate::apply_patch::ApplyPatchExec;
 use crate::apply_patch::InternalApplyPatchInvocation;
 use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::codex::Session;
@@ -15,14 +18,19 @@ use crate::error::CodexErr;
 use crate::error::SandboxErr;
 use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
-use crate::exec::StdoutStream;
-use crate::executor::ExecutionMode;
-use crate::executor::errors::ExecError;
-use crate::executor::linkers::PreparedExec;
 use crate::function_tool::FunctionCallError;
-use crate::tools::context::ApplyPatchCommandContext;
-use crate::tools::context::ExecCommandContext;
 use crate::tools::context::SharedTurnDiffTracker;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::events::ToolEventFailure;
+use crate::tools::events::ToolEventStage;
+use crate::tools::orchestrator::ToolOrchestrator;
+use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
+use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
+use crate::tools::runtimes::shell::ShellRequest;
+use crate::tools::runtimes::shell::ShellRuntime;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
 use codex_apply_patch::MaybeApplyPatchVerified;
 use codex_apply_patch::maybe_parse_apply_patch_verified;
 use codex_protocol::protocol::AskForApproval;
@@ -56,7 +64,7 @@ pub(crate) async fn handle_container_exec_with_params(
    sub_id: String,
    call_id: String,
 ) -> Result<String, FunctionCallError> {
-    let otel_event_manager = turn_context.client.get_otel_event_manager();
+    let _otel_event_manager = turn_context.client.get_otel_event_manager();

    if params.with_escalated_permissions.unwrap_or(false)
        && !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
@@ -100,86 +108,142 @@ pub(crate) async fn handle_container_exec_with_params(
        MaybeApplyPatchVerified::NotApplyPatch => None,
    };

-    let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() {
-        vec!["apply_patch".to_string(), exec.action.patch.clone()]
-    } else {
-        params.command.clone()
-    };
-
-    let exec_command_context = ExecCommandContext {
-        sub_id: sub_id.clone(),
-        call_id: call_id.clone(),
-        command_for_display: command_for_display.clone(),
-        cwd: params.cwd.clone(),
-        apply_patch: apply_patch_exec.as_ref().map(
-            |ApplyPatchExec {
-                 action,
-                 user_explicitly_approved_this_action,
-             }| ApplyPatchCommandContext {
-                user_explicitly_approved_this_action: *user_explicitly_approved_this_action,
-                changes: convert_apply_patch_to_protocol(action),
-            },
+    let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() {
+        Some(exec) => (
+            ToolEmitter::apply_patch(
+                convert_apply_patch_to_protocol(&exec.action),
+                !exec.user_explicitly_approved_this_action,
+            ),
+            Some(&turn_diff_tracker),
+        ),
+        None => (
+            ToolEmitter::shell(params.command.clone(), params.cwd.clone()),
+            None,
        ),
-        tool_name: tool_name.to_string(),
-        otel_event_manager,
    };

-    let mode = match apply_patch_exec {
-        Some(exec) => ExecutionMode::ApplyPatch(exec),
-        None => ExecutionMode::Shell,
-    };
+    let event_ctx = ToolEventCtx::new(sess.as_ref(), &sub_id, &call_id, diff_opt);
+    event_emitter.emit(event_ctx, ToolEventStage::Begin).await;

-    sess.services.executor.update_environment(
-        turn_context.sandbox_policy.clone(),
-        turn_context.cwd.clone(),
-    );
+    // Build runtime contexts only when needed (shell/apply_patch below).

-    let prepared_exec = PreparedExec::new(
-        exec_command_context,
-        params,
-        command_for_display,
-        mode,
-        Some(StdoutStream {
+    if let Some(exec) = apply_patch_exec {
+        // Route apply_patch execution through the new orchestrator/runtime.
+        let req = ApplyPatchRequest {
+            patch: exec.action.patch.clone(),
+            cwd: params.cwd.clone(),
+            timeout_ms: params.timeout_ms,
+            user_explicitly_approved: exec.user_explicitly_approved_this_action,
+            codex_exe: turn_context.codex_linux_sandbox_exe.clone(),
+        };
+
+        let mut orchestrator = ToolOrchestrator::new();
+        let mut runtime = ApplyPatchRuntime::new();
+        let tool_ctx = ToolCtx {
+            session: sess.as_ref(),
            sub_id: sub_id.clone(),
            call_id: call_id.clone(),
-            tx_event: sess.get_tx_event(),
-        }),
-        turn_context.shell_environment_policy.use_profile,
-    );
+            tool_name: tool_name.to_string(),
+        };

-    let output_result = sess
-        .run_exec_with_events(
-            turn_diff_tracker.clone(),
-            prepared_exec,
-            turn_context.approval_policy,
-        )
-        .await;
+        let out = orchestrator
+            .run(
+                &mut runtime,
+                &req,
+                &tool_ctx,
+                &turn_context,
+                turn_context.approval_policy,
+            )
+            .await;

-    // always make sure to truncate the output if its length isn't controlled.
-    match output_result {
+        handle_exec_outcome(&event_emitter, event_ctx, out).await
+    } else {
+        // Route shell execution through the new orchestrator/runtime.
+        let req = ShellRequest {
+            command: params.command.clone(),
+            cwd: params.cwd.clone(),
+            timeout_ms: params.timeout_ms,
+            env: params.env.clone(),
+            with_escalated_permissions: params.with_escalated_permissions,
+            justification: params.justification.clone(),
+        };
+
+        let mut orchestrator = ToolOrchestrator::new();
+        let mut runtime = ShellRuntime::new();
+        let tool_ctx = ToolCtx {
+            session: sess.as_ref(),
+            sub_id: sub_id.clone(),
+            call_id: call_id.clone(),
+            tool_name: tool_name.to_string(),
+        };
+
+        let out = orchestrator
+            .run(
+                &mut runtime,
+                &req,
+                &tool_ctx,
+                &turn_context,
+                turn_context.approval_policy,
+            )
+            .await;
+
+        handle_exec_outcome(&event_emitter, event_ctx, out).await
+    }
+}
+
+async fn handle_exec_outcome(
+    event_emitter: &ToolEmitter,
+    event_ctx: ToolEventCtx<'_>,
+    out: Result<ExecToolCallOutput, ToolError>,
+) -> Result<String, FunctionCallError> {
+    let event;
+    let result = match out {
        Ok(output) => {
-            let ExecToolCallOutput { exit_code, .. } = &output;
-            let content = format_exec_output_apply_patch(&output);
-            if *exit_code == 0 {
+            let content = format_exec_output_for_model(&output);
+            let exit_code = output.exit_code;
+            event = ToolEventStage::Success(output);
+            if exit_code == 0 {
                Ok(content)
            } else {
                Err(FunctionCallError::RespondToModel(content))
            }
        }
-        Err(ExecError::Function(err)) => Err(truncate_function_error(err)),
-        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
-            FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)),
-        ),
-        Err(ExecError::Codex(err)) => {
+        Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
+        | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
+            let response = format_exec_output_for_model(&output);
+            event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
+            Err(FunctionCallError::RespondToModel(response))
+        }
+        Err(ToolError::Codex(err)) => {
            let message = format!("execution error: {err:?}");
+            let response = format_exec_output(&message);
+            event = ToolEventStage::Failure(ToolEventFailure::Message(message));
            Err(FunctionCallError::RespondToModel(format_exec_output(
-                &message,
+                &response,
            )))
        }
-    }
+        Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
+            // Normalize common rejection messages for exec tools so tests and
+            // users see a clear, consistent phrase.
+            let normalized = if msg == "rejected by user" {
+                "exec command rejected by user".to_string()
+            } else {
+                msg
+            };
+            let response = format_exec_output(&normalized);
+            event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
+            Err(FunctionCallError::RespondToModel(format_exec_output(
+                &response,
+            )))
+        }
+    };
+    event_emitter.emit(event_ctx, event).await;
+    result
 }

-pub fn format_exec_output_apply_patch(exec_output: &ExecToolCallOutput) -> String {
+/// Format the combined exec output for sending back to the model.
+/// Includes exit code and duration metadata; truncates large bodies safely.
+pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
    let ExecToolCallOutput {
        exit_code,
        duration,
@@ -233,18 +297,7 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
    format_exec_output(content)
 }

-fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
-    match err {
-        FunctionCallError::RespondToModel(msg) => {
-            FunctionCallError::RespondToModel(format_exec_output(&msg))
-        }
-        FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
-        FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
-        other => other,
-    }
-}
-
-fn format_exec_output(content: &str) -> String {
+pub(super) fn format_exec_output(content: &str) -> String {
    // Head+tail truncation for the model: show the beginning and end with an elision.
    // Clients still receive full streams; only this formatted summary is capped.
    let total_lines = content.lines().count();
@@ -315,6 +368,17 @@ mod tests {
    use super::*;
    use regex_lite::Regex;

+    fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
+        match err {
+            FunctionCallError::RespondToModel(msg) => {
+                FunctionCallError::RespondToModel(format_exec_output(&msg))
+            }
+            FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
+            FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
+            other => other,
+        }
+    }
+
    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
        let pattern = truncated_message_pattern(line, total_lines);
        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
--- a/codex-rs/core/src/tools/orchestrator.rs
+++ b/codex-rs/core/src/tools/orchestrator.rs
@@ -0,0 +1,172 @@
+/*
+Module: orchestrator
+
+Central place for approvals + sandbox selection + retry semantics. Drives a
+simple sequence for any ToolRuntime: approval → select sandbox → attempt →
+retry without sandbox on denial (no re‑approval thanks to caching).
+*/
+use crate::error::CodexErr;
+use crate::error::SandboxErr;
+use crate::exec::ExecToolCallOutput;
+use crate::sandboxing::SandboxManager;
+use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
+use crate::tools::sandboxing::ToolRuntime;
+use codex_protocol::protocol::AskForApproval;
+use codex_protocol::protocol::ReviewDecision;
+
+pub(crate) struct ToolOrchestrator {
+    sandbox: SandboxManager,
+}
+
+impl ToolOrchestrator {
+    pub fn new() -> Self {
+        Self {
+            sandbox: SandboxManager::new(),
+        }
+    }
+
+    pub async fn run<Rq, Out, T>(
+        &mut self,
+        tool: &mut T,
+        req: &Rq,
+        tool_ctx: &ToolCtx<'_>,
+        turn_ctx: &crate::codex::TurnContext,
+        approval_policy: AskForApproval,
+    ) -> Result<Out, ToolError>
+    where
+        T: ToolRuntime<Rq, Out>,
+    {
+        let otel = turn_ctx.client.get_otel_event_manager();
+        let otel_tn = &tool_ctx.tool_name;
+        let otel_ci = &tool_ctx.call_id;
+        let otel_user = codex_otel::otel_event_manager::ToolDecisionSource::User;
+        let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
+
+        // 1) Approval
+        let needs_initial_approval =
+            tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
+        let mut already_approved = false;
+
+        if needs_initial_approval {
+            let approval_ctx = ApprovalCtx {
+                session: tool_ctx.session,
+                sub_id: &tool_ctx.sub_id,
+                call_id: &tool_ctx.call_id,
+                retry_reason: None,
+            };
+            let decision = tool.start_approval_async(req, approval_ctx).await;
+
+            otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
+
+            match decision {
+                ReviewDecision::Denied | ReviewDecision::Abort => {
+                    return Err(ToolError::Rejected("rejected by user".to_string()));
+                }
+                ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
+            }
+            already_approved = true;
+        } else {
+            otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
+        }
+
+        // 2) First attempt under the selected sandbox.
+        let mut initial_sandbox = self
+            .sandbox
+            .select_initial(&turn_ctx.sandbox_policy, tool.sandbox_preference());
+        if tool.wants_escalated_first_attempt(req) {
+            initial_sandbox = crate::exec::SandboxType::None;
+        }
+        let initial_attempt = SandboxAttempt {
+            sandbox: initial_sandbox,
+            policy: &turn_ctx.sandbox_policy,
+            manager: &self.sandbox,
+            sandbox_cwd: &turn_ctx.cwd,
+            codex_linux_sandbox_exe: turn_ctx.codex_linux_sandbox_exe.as_ref(),
+        };
+
+        match tool.run(req, &initial_attempt, tool_ctx).await {
+            Ok(out) => {
+                // We have a successful initial result
+                Ok(out)
+            }
+            Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
+                if !tool.escalate_on_failure() {
+                    return Err(ToolError::SandboxDenied(
+                        "sandbox denied and no retry".to_string(),
+                    ));
+                }
+                // Under `Never`, do not retry without sandbox; surface a concise message
+                // derived from the actual output (platform-agnostic).
+                if matches!(approval_policy, AskForApproval::Never) {
+                    let msg = build_never_denied_message_from_output(output.as_ref());
+                    return Err(ToolError::SandboxDenied(msg));
+                }
+
+                // Ask for approval before retrying without sandbox.
+                if !tool.should_bypass_approval(approval_policy, already_approved) {
+                    let reason_msg = build_denial_reason_from_output(output.as_ref());
+                    let approval_ctx = ApprovalCtx {
+                        session: tool_ctx.session,
+                        sub_id: &tool_ctx.sub_id,
+                        call_id: &tool_ctx.call_id,
+                        retry_reason: Some(reason_msg),
+                    };
+
+                    let decision = tool.start_approval_async(req, approval_ctx).await;
+                    otel.tool_decision(otel_tn, otel_ci, decision, otel_user);
+
+                    match decision {
+                        ReviewDecision::Denied | ReviewDecision::Abort => {
+                            return Err(ToolError::Rejected("rejected by user".to_string()));
+                        }
+                        ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
+                    }
+                }
+
+                let escalated_attempt = SandboxAttempt {
+                    sandbox: crate::exec::SandboxType::None,
+                    policy: &turn_ctx.sandbox_policy,
+                    manager: &self.sandbox,
+                    sandbox_cwd: &turn_ctx.cwd,
+                    codex_linux_sandbox_exe: None,
+                };
+
+                // Second attempt.
+                (*tool).run(req, &escalated_attempt, tool_ctx).await
+            }
+            other => other,
+        }
+    }
+}
+
+fn build_never_denied_message_from_output(output: &ExecToolCallOutput) -> String {
+    let body = format!(
+        "{}\n{}\n{}",
+        output.stderr.text, output.stdout.text, output.aggregated_output.text
+    )
+    .to_lowercase();
+
+    let detail = if body.contains("permission denied") {
+        Some("Permission denied")
+    } else if body.contains("operation not permitted") {
+        Some("Operation not permitted")
+    } else if body.contains("read-only file system") {
+        Some("Read-only file system")
+    } else {
+        None
+    };
+
+    match detail {
+        Some(tag) => format!("failed in sandbox: {tag}"),
+        None => "failed in sandbox".to_string(),
+    }
+}
+
+fn build_denial_reason_from_output(_output: &ExecToolCallOutput) -> String {
+    // Keep approval reason terse and stable for UX/tests, but accept the
+    // output so we can evolve heuristics later without touching call sites.
+    "command failed; retry without sandbox?".to_string()
+}
--- a/codex-rs/core/src/tools/runtimes/apply_patch.rs
+++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs
@@ -0,0 +1,148 @@
+//! Apply Patch runtime: executes verified patches under the orchestrator.
+//!
+//! Assumes `apply_patch` verification/approval happened upstream. Reuses that
+//! decision to avoid re-prompting, builds the self-invocation command for
+//! `codex --codex-run-as-apply-patch`, and runs under the current
+//! `SandboxAttempt` with a minimal environment.
+use crate::CODEX_APPLY_PATCH_ARG1;
+use crate::exec::ExecToolCallOutput;
+use crate::sandboxing::CommandSpec;
+use crate::sandboxing::execute_env;
+use crate::tools::sandboxing::Approvable;
+use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::Sandboxable;
+use crate::tools::sandboxing::SandboxablePreference;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
+use crate::tools::sandboxing::ToolRuntime;
+use crate::tools::sandboxing::with_cached_approval;
+use codex_protocol::protocol::ReviewDecision;
+use futures::future::BoxFuture;
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+#[derive(Clone, Debug)]
+pub struct ApplyPatchRequest {
+    pub patch: String,
+    pub cwd: PathBuf,
+    pub timeout_ms: Option<u64>,
+    pub user_explicitly_approved: bool,
+    pub codex_exe: Option<PathBuf>,
+}
+
+#[derive(Default)]
+pub struct ApplyPatchRuntime;
+
+#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
+pub(crate) struct ApprovalKey {
+    patch: String,
+    cwd: PathBuf,
+}
+
+impl ApplyPatchRuntime {
+    pub fn new() -> Self {
+        Self
+    }
+
+    fn build_command_spec(req: &ApplyPatchRequest) -> Result<CommandSpec, ToolError> {
+        use std::env;
+        let exe = if let Some(path) = &req.codex_exe {
+            path.clone()
+        } else {
+            env::current_exe()
+                .map_err(|e| ToolError::Rejected(format!("failed to determine codex exe: {e}")))?
+        };
+        let program = exe.to_string_lossy().to_string();
+        Ok(CommandSpec {
+            program,
+            args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()],
+            cwd: req.cwd.clone(),
+            timeout_ms: req.timeout_ms,
+            // Run apply_patch with a minimal environment for determinism and to avoid leaks.
+            env: HashMap::new(),
+            with_escalated_permissions: None,
+            justification: None,
+        })
+    }
+
+    fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
+        Some(crate::exec::StdoutStream {
+            sub_id: ctx.sub_id.clone(),
+            call_id: ctx.call_id.clone(),
+            tx_event: ctx.session.get_tx_event(),
+        })
+    }
+}
+
+impl Sandboxable for ApplyPatchRuntime {
+    fn sandbox_preference(&self) -> SandboxablePreference {
+        SandboxablePreference::Auto
+    }
+    fn escalate_on_failure(&self) -> bool {
+        true
+    }
+}
+
+impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
+    type ApprovalKey = ApprovalKey;
+
+    fn approval_key(&self, req: &ApplyPatchRequest) -> Self::ApprovalKey {
+        ApprovalKey {
+            patch: req.patch.clone(),
+            cwd: req.cwd.clone(),
+        }
+    }
+
+    fn start_approval_async<'a>(
+        &'a mut self,
+        req: &'a ApplyPatchRequest,
+        ctx: ApprovalCtx<'a>,
+    ) -> BoxFuture<'a, ReviewDecision> {
+        let key = self.approval_key(req);
+        let session = ctx.session;
+        let sub_id = ctx.sub_id.to_string();
+        let call_id = ctx.call_id.to_string();
+        let cwd = req.cwd.clone();
+        let retry_reason = ctx.retry_reason.clone();
+        let user_explicitly_approved = req.user_explicitly_approved;
+        Box::pin(async move {
+            with_cached_approval(&session.services, key, || async move {
+                if let Some(reason) = retry_reason {
+                    session
+                        .request_command_approval(
+                            sub_id,
+                            call_id,
+                            vec!["apply_patch".to_string()],
+                            cwd,
+                            Some(reason),
+                        )
+                        .await
+                } else if user_explicitly_approved {
+                    ReviewDecision::ApprovedForSession
+                } else {
+                    ReviewDecision::Approved
+                }
+            })
+            .await
+        })
+    }
+}
+
+impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
+    async fn run(
+        &mut self,
+        req: &ApplyPatchRequest,
+        attempt: &SandboxAttempt<'_>,
+        ctx: &ToolCtx<'_>,
+    ) -> Result<ExecToolCallOutput, ToolError> {
+        let spec = Self::build_command_spec(req)?;
+        let env = attempt
+            .env_for(&spec)
+            .map_err(|err| ToolError::Codex(err.into()))?;
+        let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
+            .await
+            .map_err(ToolError::Codex)?;
+        Ok(out)
+    }
+}
--- a/codex-rs/core/src/tools/runtimes/mod.rs
+++ b/codex-rs/core/src/tools/runtimes/mod.rs
@@ -0,0 +1,38 @@
+/*
+Module: runtimes
+
+Concrete ToolRuntime implementations for specific tools. Each runtime stays
+small and focused and reuses the orchestrator for approvals + sandbox + retry.
+*/
+use crate::sandboxing::CommandSpec;
+use crate::tools::sandboxing::ToolError;
+use std::collections::HashMap;
+use std::path::Path;
+
+pub mod apply_patch;
+pub mod shell;
+pub mod unified_exec;
+
+/// Shared helper to construct a CommandSpec from a tokenized command line.
+/// Validates that at least a program is present.
+pub(crate) fn build_command_spec(
+    command: &[String],
+    cwd: &Path,
+    env: &HashMap<String, String>,
+    timeout_ms: Option<u64>,
+    with_escalated_permissions: Option<bool>,
+    justification: Option<String>,
+) -> Result<CommandSpec, ToolError> {
+    let (program, args) = command
+        .split_first()
+        .ok_or_else(|| ToolError::Rejected("command args are empty".to_string()))?;
+    Ok(CommandSpec {
+        program: program.clone(),
+        args: args.to_vec(),
+        cwd: cwd.to_path_buf(),
+        env: env.clone(),
+        timeout_ms,
+        with_escalated_permissions,
+        justification,
+    })
+}
--- a/codex-rs/core/src/tools/runtimes/shell.rs
+++ b/codex-rs/core/src/tools/runtimes/shell.rs
@@ -0,0 +1,164 @@
+/*
+Runtime: shell
+
+Executes shell requests under the orchestrator: asks for approval when needed,
+builds a CommandSpec, and runs it under the current SandboxAttempt.
+*/
+use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
+use crate::command_safety::is_safe_command::is_known_safe_command;
+use crate::exec::ExecToolCallOutput;
+use crate::protocol::SandboxPolicy;
+use crate::sandboxing::execute_env;
+use crate::tools::runtimes::build_command_spec;
+use crate::tools::sandboxing::Approvable;
+use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::Sandboxable;
+use crate::tools::sandboxing::SandboxablePreference;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
+use crate::tools::sandboxing::ToolRuntime;
+use crate::tools::sandboxing::with_cached_approval;
+use codex_protocol::protocol::AskForApproval;
+use codex_protocol::protocol::ReviewDecision;
+use futures::future::BoxFuture;
+use std::path::PathBuf;
+
+#[derive(Clone, Debug)]
+pub struct ShellRequest {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+    pub timeout_ms: Option<u64>,
+    pub env: std::collections::HashMap<String, String>,
+    pub with_escalated_permissions: Option<bool>,
+    pub justification: Option<String>,
+}
+
+#[derive(Default)]
+pub struct ShellRuntime;
+
+#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
+pub(crate) struct ApprovalKey {
+    command: Vec<String>,
+    cwd: PathBuf,
+    escalated: bool,
+}
+
+impl ShellRuntime {
+    pub fn new() -> Self {
+        Self
+    }
+
+    fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
+        Some(crate::exec::StdoutStream {
+            sub_id: ctx.sub_id.clone(),
+            call_id: ctx.call_id.clone(),
+            tx_event: ctx.session.get_tx_event(),
+        })
+    }
+}
+
+impl Sandboxable for ShellRuntime {
+    fn sandbox_preference(&self) -> SandboxablePreference {
+        SandboxablePreference::Auto
+    }
+    fn escalate_on_failure(&self) -> bool {
+        true
+    }
+}
+
+impl Approvable<ShellRequest> for ShellRuntime {
+    type ApprovalKey = ApprovalKey;
+
+    fn approval_key(&self, req: &ShellRequest) -> Self::ApprovalKey {
+        ApprovalKey {
+            command: req.command.clone(),
+            cwd: req.cwd.clone(),
+            escalated: req.with_escalated_permissions.unwrap_or(false),
+        }
+    }
+
+    fn start_approval_async<'a>(
+        &'a mut self,
+        req: &'a ShellRequest,
+        ctx: ApprovalCtx<'a>,
+    ) -> BoxFuture<'a, ReviewDecision> {
+        let key = self.approval_key(req);
+        let command = req.command.clone();
+        let cwd = req.cwd.clone();
+        let reason = ctx
+            .retry_reason
+            .clone()
+            .or_else(|| req.justification.clone());
+        let session = ctx.session;
+        let sub_id = ctx.sub_id.to_string();
+        let call_id = ctx.call_id.to_string();
+        Box::pin(async move {
+            with_cached_approval(&session.services, key, || async move {
+                session
+                    .request_command_approval(sub_id, call_id, command, cwd, reason)
+                    .await
+            })
+            .await
+        })
+    }
+
+    fn wants_initial_approval(
+        &self,
+        req: &ShellRequest,
+        policy: AskForApproval,
+        sandbox_policy: &SandboxPolicy,
+    ) -> bool {
+        if is_known_safe_command(&req.command) {
+            return false;
+        }
+        match policy {
+            AskForApproval::Never | AskForApproval::OnFailure => false,
+            AskForApproval::OnRequest => {
+                // In DangerFullAccess, only prompt if the command looks dangerous.
+                if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
+                    return command_might_be_dangerous(&req.command);
+                }
+
+                // In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
+                // non‑escalated, non‑dangerous commands — let the sandbox enforce
+                // restrictions (e.g., block network/write) without a user prompt.
+                let wants_escalation = req.with_escalated_permissions.unwrap_or(false);
+                if wants_escalation {
+                    return true;
+                }
+                command_might_be_dangerous(&req.command)
+            }
+            AskForApproval::UnlessTrusted => !is_known_safe_command(&req.command),
+        }
+    }
+
+    fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
+        req.with_escalated_permissions.unwrap_or(false)
+    }
+}
+
+impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
+    async fn run(
+        &mut self,
+        req: &ShellRequest,
+        attempt: &SandboxAttempt<'_>,
+        ctx: &ToolCtx<'_>,
+    ) -> Result<ExecToolCallOutput, ToolError> {
+        let spec = build_command_spec(
+            &req.command,
+            &req.cwd,
+            &req.env,
+            req.timeout_ms,
+            req.with_escalated_permissions,
+            req.justification.clone(),
+        )?;
+        let env = attempt
+            .env_for(&spec)
+            .map_err(|err| ToolError::Codex(err.into()))?;
+        let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
+            .await
+            .map_err(ToolError::Codex)?;
+        Ok(out)
+    }
+}
--- a/codex-rs/core/src/tools/runtimes/unified_exec.rs
+++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs
@@ -0,0 +1,123 @@
+/*
+Runtime: unified exec
+
+Handles approval + sandbox orchestration for unified exec requests, delegating to
+the session manager to spawn PTYs once an ExecEnv is prepared.
+*/
+use crate::error::CodexErr;
+use crate::error::SandboxErr;
+use crate::tools::runtimes::build_command_spec;
+use crate::tools::sandboxing::Approvable;
+use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::Sandboxable;
+use crate::tools::sandboxing::SandboxablePreference;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
+use crate::tools::sandboxing::ToolRuntime;
+use crate::tools::sandboxing::with_cached_approval;
+use crate::unified_exec::UnifiedExecError;
+use crate::unified_exec::UnifiedExecSession;
+use crate::unified_exec::UnifiedExecSessionManager;
+use codex_protocol::protocol::ReviewDecision;
+use futures::future::BoxFuture;
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+#[derive(Clone, Debug)]
+pub struct UnifiedExecRequest {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+    pub env: HashMap<String, String>,
+}
+
+#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
+pub struct UnifiedExecApprovalKey {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+}
+
+pub struct UnifiedExecRuntime<'a> {
+    manager: &'a UnifiedExecSessionManager,
+}
+
+impl UnifiedExecRequest {
+    pub fn new(command: Vec<String>, cwd: PathBuf, env: HashMap<String, String>) -> Self {
+        Self { command, cwd, env }
+    }
+}
+
+impl<'a> UnifiedExecRuntime<'a> {
+    pub fn new(manager: &'a UnifiedExecSessionManager) -> Self {
+        Self { manager }
+    }
+}
+
+impl Sandboxable for UnifiedExecRuntime<'_> {
+    fn sandbox_preference(&self) -> SandboxablePreference {
+        SandboxablePreference::Auto
+    }
+
+    fn escalate_on_failure(&self) -> bool {
+        true
+    }
+}
+
+impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
+    type ApprovalKey = UnifiedExecApprovalKey;
+
+    fn approval_key(&self, req: &UnifiedExecRequest) -> Self::ApprovalKey {
+        UnifiedExecApprovalKey {
+            command: req.command.clone(),
+            cwd: req.cwd.clone(),
+        }
+    }
+
+    fn start_approval_async<'b>(
+        &'b mut self,
+        req: &'b UnifiedExecRequest,
+        ctx: ApprovalCtx<'b>,
+    ) -> BoxFuture<'b, ReviewDecision> {
+        let key = self.approval_key(req);
+        let session = ctx.session;
+        let sub_id = ctx.sub_id.to_string();
+        let call_id = ctx.call_id.to_string();
+        let command = req.command.clone();
+        let cwd = req.cwd.clone();
+        let reason = ctx.retry_reason.clone();
+        Box::pin(async move {
+            with_cached_approval(&session.services, key, || async move {
+                session
+                    .request_command_approval(sub_id, call_id, command, cwd, reason)
+                    .await
+            })
+            .await
+        })
+    }
+}
+
+impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRuntime<'a> {
+    async fn run(
+        &mut self,
+        req: &UnifiedExecRequest,
+        attempt: &SandboxAttempt<'_>,
+        _ctx: &ToolCtx<'_>,
+    ) -> Result<UnifiedExecSession, ToolError> {
+        let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None)
+            .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
+        let exec_env = attempt
+            .env_for(&spec)
+            .map_err(|err| ToolError::Codex(err.into()))?;
+        self.manager
+            .open_session_with_exec_env(&exec_env)
+            .await
+            .map_err(|err| match err {
+                UnifiedExecError::SandboxDenied { output, .. } => {
+                    ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied {
+                        output: Box::new(output),
+                    }))
+                }
+                other => ToolError::Rejected(other.to_string()),
+            })
+    }
+}
--- a/codex-rs/core/src/tools/sandboxing.rs
+++ b/codex-rs/core/src/tools/sandboxing.rs
@@ -0,0 +1,190 @@
+//! Shared approvals and sandboxing traits used by tool runtimes.
+//!
+//! Consolidates the approval flow primitives (`ApprovalDecision`, `ApprovalStore`,
+//! `ApprovalCtx`, `Approvable`) together with the sandbox orchestration traits
+//! and helpers (`Sandboxable`, `ToolRuntime`, `SandboxAttempt`, etc.).
+
+use crate::codex::Session;
+use crate::error::CodexErr;
+use crate::protocol::SandboxPolicy;
+use crate::sandboxing::CommandSpec;
+use crate::sandboxing::SandboxManager;
+use crate::sandboxing::SandboxTransformError;
+use crate::state::SessionServices;
+use codex_protocol::protocol::AskForApproval;
+use codex_protocol::protocol::ReviewDecision;
+use std::collections::HashMap;
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::path::Path;
+
+use futures::Future;
+use futures::future::BoxFuture;
+use serde::Serialize;
+
+#[derive(Clone, Default, Debug)]
+pub(crate) struct ApprovalStore {
+    // Store serialized keys for generic caching across requests.
+    map: HashMap<String, ReviewDecision>,
+}
+
+impl ApprovalStore {
+    pub fn get<K>(&self, key: &K) -> Option<ReviewDecision>
+    where
+        K: Serialize,
+    {
+        let s = serde_json::to_string(key).ok()?;
+        self.map.get(&s).cloned()
+    }
+
+    pub fn put<K>(&mut self, key: K, value: ReviewDecision)
+    where
+        K: Serialize,
+    {
+        if let Ok(s) = serde_json::to_string(&key) {
+            self.map.insert(s, value);
+        }
+    }
+}
+
+pub(crate) async fn with_cached_approval<K, F, Fut>(
+    services: &SessionServices,
+    key: K,
+    fetch: F,
+) -> ReviewDecision
+where
+    K: Serialize + Clone,
+    F: FnOnce() -> Fut,
+    Fut: Future<Output = ReviewDecision>,
+{
+    {
+        let store = services.tool_approvals.lock().await;
+        if let Some(decision) = store.get(&key) {
+            return decision;
+        }
+    }
+
+    let decision = fetch().await;
+
+    if matches!(decision, ReviewDecision::ApprovedForSession) {
+        let mut store = services.tool_approvals.lock().await;
+        store.put(key, ReviewDecision::ApprovedForSession);
+    }
+
+    decision
+}
+
+#[derive(Clone)]
+pub(crate) struct ApprovalCtx<'a> {
+    pub session: &'a Session,
+    pub sub_id: &'a str,
+    pub call_id: &'a str,
+    pub retry_reason: Option<String>,
+}
+
+pub(crate) trait Approvable<Req> {
+    type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
+
+    fn approval_key(&self, req: &Req) -> Self::ApprovalKey;
+
+    /// Some tools may request to skip the sandbox on the first attempt
+    /// (e.g., when the request explicitly asks for escalated permissions).
+    /// Defaults to `false`.
+    fn wants_escalated_first_attempt(&self, _req: &Req) -> bool {
+        false
+    }
+
+    fn should_bypass_approval(&self, policy: AskForApproval, already_approved: bool) -> bool {
+        if already_approved {
+            // We do not ask one more time
+            return true;
+        }
+        matches!(policy, AskForApproval::Never)
+    }
+
+    /// Decide whether an initial user approval should be requested before the
+    /// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
+    /// - Never, OnFailure: do not ask
+    /// - OnRequest: ask unless sandbox policy is DangerFullAccess
+    /// - UnlessTrusted: always ask
+    fn wants_initial_approval(
+        &self,
+        _req: &Req,
+        policy: AskForApproval,
+        sandbox_policy: &SandboxPolicy,
+    ) -> bool {
+        match policy {
+            AskForApproval::Never | AskForApproval::OnFailure => false,
+            AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
+            AskForApproval::UnlessTrusted => true,
+        }
+    }
+
+    fn start_approval_async<'a>(
+        &'a mut self,
+        req: &'a Req,
+        ctx: ApprovalCtx<'a>,
+    ) -> BoxFuture<'a, ReviewDecision>;
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum SandboxablePreference {
+    Auto,
+    #[allow(dead_code)] // Will be used by later tools.
+    Require,
+    #[allow(dead_code)] // Will be used by later tools.
+    Forbid,
+}
+
+pub(crate) trait Sandboxable {
+    fn sandbox_preference(&self) -> SandboxablePreference;
+    fn escalate_on_failure(&self) -> bool {
+        true
+    }
+}
+
+pub(crate) struct ToolCtx<'a> {
+    pub session: &'a Session,
+    pub sub_id: String,
+    pub call_id: String,
+    pub tool_name: String,
+}
+
+#[derive(Debug)]
+pub(crate) enum ToolError {
+    Rejected(String),
+    SandboxDenied(String),
+    Codex(CodexErr),
+}
+
+pub(crate) trait ToolRuntime<Req, Out>: Approvable<Req> + Sandboxable {
+    async fn run(
+        &mut self,
+        req: &Req,
+        attempt: &SandboxAttempt<'_>,
+        ctx: &ToolCtx,
+    ) -> Result<Out, ToolError>;
+}
+
+pub(crate) struct SandboxAttempt<'a> {
+    pub sandbox: crate::exec::SandboxType,
+    pub policy: &'a crate::protocol::SandboxPolicy,
+    pub(crate) manager: &'a SandboxManager,
+    pub(crate) sandbox_cwd: &'a Path,
+    pub codex_linux_sandbox_exe: Option<&'a std::path::PathBuf>,
+}
+
+impl<'a> SandboxAttempt<'a> {
+    pub fn env_for(
+        &self,
+        spec: &CommandSpec,
+    ) -> Result<crate::sandboxing::ExecEnv, SandboxTransformError> {
+        self.manager.transform(
+            spec,
+            self.policy,
+            self.sandbox,
+            self.sandbox_cwd,
+            self.codex_linux_sandbox_exe,
+        )
+    }
+}
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -815,12 +815,7 @@ pub(crate) fn build_specs(
    config: &ToolsConfig,
    mcp_tools: Option<HashMap<String, mcp_types::Tool>>,
 ) -> ToolRegistryBuilder {
-    use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
-    use crate::exec_command::WRITE_STDIN_TOOL_NAME;
-    use crate::exec_command::create_exec_command_tool_for_responses_api;
-    use crate::exec_command::create_write_stdin_tool_for_responses_api;
    use crate::tools::handlers::ApplyPatchHandler;
-    use crate::tools::handlers::ExecStreamHandler;
    use crate::tools::handlers::GrepFilesHandler;
    use crate::tools::handlers::ListDirHandler;
    use crate::tools::handlers::McpHandler;
@@ -836,7 +831,6 @@ pub(crate) fn build_specs(
    let mut builder = ToolRegistryBuilder::new();

    let shell_handler = Arc::new(ShellHandler);
-    let exec_stream_handler = Arc::new(ExecStreamHandler);
    let unified_exec_handler = Arc::new(UnifiedExecHandler);
    let plan_handler = Arc::new(PlanHandler);
    let apply_patch_handler = Arc::new(ApplyPatchHandler);
@@ -844,7 +838,10 @@ pub(crate) fn build_specs(
    let mcp_handler = Arc::new(McpHandler);
    let mcp_resource_handler = Arc::new(McpResourceHandler);

-    if config.experimental_unified_exec_tool {
+    let use_unified_exec = config.experimental_unified_exec_tool
+        || matches!(config.shell_type, ConfigShellToolType::Streamable);
+
+    if use_unified_exec {
        builder.push_spec(create_unified_exec_tool());
        builder.register_handler("unified_exec", unified_exec_handler);
    } else {
@@ -856,14 +853,7 @@ pub(crate) fn build_specs(
                builder.push_spec(ToolSpec::LocalShell {});
            }
            ConfigShellToolType::Streamable => {
-                builder.push_spec(ToolSpec::Function(
-                    create_exec_command_tool_for_responses_api(),
-                ));
-                builder.push_spec(ToolSpec::Function(
-                    create_write_stdin_tool_for_responses_api(),
-                ));
-                builder.register_handler(EXEC_COMMAND_TOOL_NAME, exec_stream_handler.clone());
-                builder.register_handler(WRITE_STDIN_TOOL_NAME, exec_stream_handler);
+                // Already handled by use_unified_exec.
            }
        }
    }
--- a/codex-rs/core/src/unified_exec/errors.rs
+++ b/codex-rs/core/src/unified_exec/errors.rs
@@ -1,22 +1,29 @@
+use crate::exec::ExecToolCallOutput;
 use thiserror::Error;

 #[derive(Debug, Error)]
 pub(crate) enum UnifiedExecError {
-    #[error("Failed to create unified exec session: {pty_error}")]
-    CreateSession {
-        #[source]
-        pty_error: anyhow::Error,
-    },
+    #[error("Failed to create unified exec session: {message}")]
+    CreateSession { message: String },
    #[error("Unknown session id {session_id}")]
    UnknownSessionId { session_id: i32 },
    #[error("failed to write to stdin")]
    WriteToStdin,
    #[error("missing command line for unified exec request")]
    MissingCommandLine,
+    #[error("Command denied by sandbox: {message}")]
+    SandboxDenied {
+        message: String,
+        output: ExecToolCallOutput,
+    },
 }

 impl UnifiedExecError {
-    pub(crate) fn create_session(error: anyhow::Error) -> Self {
-        Self::CreateSession { pty_error: error }
+    pub(crate) fn create_session(message: String) -> Self {
+        Self::CreateSession { message }
+    }
+
+    pub(crate) fn sandbox_denied(message: String, output: ExecToolCallOutput) -> Self {
+        Self::SandboxDenied { message, output }
    }
 }
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -1,36 +1,55 @@
-use portable_pty::CommandBuilder;
-use portable_pty::PtySize;
-use portable_pty::native_pty_system;
-use std::collections::HashMap;
-use std::collections::VecDeque;
-use std::io::ErrorKind;
-use std::io::Read;
-use std::sync::Arc;
-use std::sync::Mutex as StdMutex;
-use std::sync::atomic::AtomicBool;
-use std::sync::atomic::AtomicI32;
-use std::sync::atomic::Ordering;
-use tokio::sync::Mutex;
-use tokio::sync::Notify;
-use tokio::sync::mpsc;
-use tokio::task::JoinHandle;
-use tokio::time::Duration;
-use tokio::time::Instant;
+//! Unified Exec: interactive PTY execution orchestrated with approvals + sandboxing.
+//!
+//! Responsibilities
+//! - Manages interactive PTY sessions (create, reuse, buffer output with caps).
+//! - Uses the shared ToolOrchestrator to handle approval, sandbox selection, and
+//!   retry semantics in a single, descriptive flow.
+//! - Spawns the PTY from a sandbox‑transformed `ExecEnv`; on sandbox denial,
+//!   retries without sandbox when policy allows (no re‑prompt thanks to caching).
+//! - Uses the shared `is_likely_sandbox_denied` heuristic to keep denial messages
+//!   consistent with other exec paths.
+//!
+//! Flow at a glance (open session)
+//! 1) Build a small request `{ command, cwd }`.
+//! 2) Orchestrator: approval (bypass/cache/prompt) → select sandbox → run.
+//! 3) Runtime: transform `CommandSpec` → `ExecEnv` → spawn PTY.
+//! 4) If denial, orchestrator retries with `SandboxType::None`.
+//! 5) Session is returned with streaming output + metadata.
+//!
+//! This keeps policy logic and user interaction centralized while the PTY/session
+//! concerns remain isolated here. The implementation is split between:
+//! - `session.rs`: PTY session lifecycle + output buffering.
+//! - `session_manager.rs`: orchestration (approvals, sandboxing, reuse) and request handling.

-use crate::exec_command::ExecCommandSession;
-use crate::truncate::truncate_middle;
+use std::collections::HashMap;
+use std::sync::atomic::AtomicI32;
+
+use tokio::sync::Mutex;
+
+use crate::codex::Session;
+use crate::codex::TurnContext;

 mod errors;
+mod session;
+mod session_manager;

 pub(crate) use errors::UnifiedExecError;
+pub(crate) use session::UnifiedExecSession;

 const DEFAULT_TIMEOUT_MS: u64 = 1_000;
 const MAX_TIMEOUT_MS: u64 = 60_000;
 const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 128 * 1024; // 128 KiB

+pub(crate) struct UnifiedExecContext<'a> {
+    pub session: &'a Session,
+    pub turn: &'a TurnContext,
+    pub sub_id: &'a str,
+    pub call_id: &'a str,
+    pub session_id: Option<i32>,
+}
+
 #[derive(Debug)]
 pub(crate) struct UnifiedExecRequest<'a> {
-    pub session_id: Option<i32>,
    pub input_chunks: &'a [String],
    pub timeout_ms: Option<u64>,
 }
@@ -44,379 +63,60 @@ pub(crate) struct UnifiedExecResult {
 #[derive(Debug, Default)]
 pub(crate) struct UnifiedExecSessionManager {
    next_session_id: AtomicI32,
-    sessions: Mutex<HashMap<i32, ManagedUnifiedExecSession>>,
-}
-
-#[derive(Debug)]
-struct ManagedUnifiedExecSession {
-    session: ExecCommandSession,
-    output_buffer: OutputBuffer,
-    /// Notifies waiters whenever new output has been appended to
-    /// `output_buffer`, allowing clients to poll for fresh data.
-    output_notify: Arc<Notify>,
-    output_task: JoinHandle<()>,
-}
-
-#[derive(Debug, Default)]
-struct OutputBufferState {
-    chunks: VecDeque<Vec<u8>>,
-    total_bytes: usize,
-}
-
-impl OutputBufferState {
-    fn push_chunk(&mut self, chunk: Vec<u8>) {
-        self.total_bytes = self.total_bytes.saturating_add(chunk.len());
-        self.chunks.push_back(chunk);
-
-        let mut excess = self
-            .total_bytes
-            .saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES);
-
-        while excess > 0 {
-            match self.chunks.front_mut() {
-                Some(front) if excess >= front.len() => {
-                    excess -= front.len();
-                    self.total_bytes = self.total_bytes.saturating_sub(front.len());
-                    self.chunks.pop_front();
-                }
-                Some(front) => {
-                    front.drain(..excess);
-                    self.total_bytes = self.total_bytes.saturating_sub(excess);
-                    break;
-                }
-                None => break,
-            }
-        }
-    }
-
-    fn drain(&mut self) -> Vec<Vec<u8>> {
-        let drained: Vec<Vec<u8>> = self.chunks.drain(..).collect();
-        self.total_bytes = 0;
-        drained
-    }
-}
-
-type OutputBuffer = Arc<Mutex<OutputBufferState>>;
-type OutputHandles = (OutputBuffer, Arc<Notify>);
-
-impl ManagedUnifiedExecSession {
-    fn new(
-        session: ExecCommandSession,
-        initial_output_rx: tokio::sync::broadcast::Receiver<Vec<u8>>,
-    ) -> Self {
-        let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
-        let output_notify = Arc::new(Notify::new());
-        let mut receiver = initial_output_rx;
-        let buffer_clone = Arc::clone(&output_buffer);
-        let notify_clone = Arc::clone(&output_notify);
-        let output_task = tokio::spawn(async move {
-            loop {
-                match receiver.recv().await {
-                    Ok(chunk) => {
-                        let mut guard = buffer_clone.lock().await;
-                        guard.push_chunk(chunk);
-                        drop(guard);
-                        notify_clone.notify_waiters();
-                    }
-                    // If we lag behind the broadcast buffer, skip missed
-                    // messages but keep the task alive to continue streaming.
-                    Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => {
-                        continue;
-                    }
-                    // When the sender closes, exit the task.
-                    Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
-                }
-            }
-        });
-
-        Self {
-            session,
-            output_buffer,
-            output_notify,
-            output_task,
-        }
-    }
-
-    fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
-        self.session.writer_sender()
-    }
-
-    fn output_handles(&self) -> OutputHandles {
-        (
-            Arc::clone(&self.output_buffer),
-            Arc::clone(&self.output_notify),
-        )
-    }
-
-    fn has_exited(&self) -> bool {
-        self.session.has_exited()
-    }
-}
-
-impl Drop for ManagedUnifiedExecSession {
-    fn drop(&mut self) {
-        self.output_task.abort();
-    }
-}
-
-impl UnifiedExecSessionManager {
-    pub async fn handle_request(
-        &self,
-        request: UnifiedExecRequest<'_>,
-    ) -> Result<UnifiedExecResult, UnifiedExecError> {
-        let (timeout_ms, timeout_warning) = match request.timeout_ms {
-            Some(requested) if requested > MAX_TIMEOUT_MS => (
-                MAX_TIMEOUT_MS,
-                Some(format!(
-                    "Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n"
-                )),
-            ),
-            Some(requested) => (requested, None),
-            None => (DEFAULT_TIMEOUT_MS, None),
-        };
-
-        let mut new_session: Option<ManagedUnifiedExecSession> = None;
-        let session_id;
-        let writer_tx;
-        let output_buffer;
-        let output_notify;
-
-        if let Some(existing_id) = request.session_id {
-            let mut sessions = self.sessions.lock().await;
-            match sessions.get(&existing_id) {
-                Some(session) => {
-                    if session.has_exited() {
-                        sessions.remove(&existing_id);
-                        return Err(UnifiedExecError::UnknownSessionId {
-                            session_id: existing_id,
-                        });
-                    }
-                    let (buffer, notify) = session.output_handles();
-                    session_id = existing_id;
-                    writer_tx = session.writer_sender();
-                    output_buffer = buffer;
-                    output_notify = notify;
-                }
-                None => {
-                    return Err(UnifiedExecError::UnknownSessionId {
-                        session_id: existing_id,
-                    });
-                }
-            }
-            drop(sessions);
-        } else {
-            let command = request.input_chunks.to_vec();
-            let new_id = self.next_session_id.fetch_add(1, Ordering::SeqCst);
-            let (session, initial_output_rx) = create_unified_exec_session(&command).await?;
-            let managed_session = ManagedUnifiedExecSession::new(session, initial_output_rx);
-            let (buffer, notify) = managed_session.output_handles();
-            writer_tx = managed_session.writer_sender();
-            output_buffer = buffer;
-            output_notify = notify;
-            session_id = new_id;
-            new_session = Some(managed_session);
-        };
-
-        if request.session_id.is_some() {
-            let joined_input = request.input_chunks.join(" ");
-            if !joined_input.is_empty() && writer_tx.send(joined_input.into_bytes()).await.is_err()
-            {
-                return Err(UnifiedExecError::WriteToStdin);
-            }
-        }
-
-        let mut collected: Vec<u8> = Vec::with_capacity(4096);
-        let start = Instant::now();
-        let deadline = start + Duration::from_millis(timeout_ms);
-
-        loop {
-            let drained_chunks;
-            let mut wait_for_output = None;
-            {
-                let mut guard = output_buffer.lock().await;
-                drained_chunks = guard.drain();
-                if drained_chunks.is_empty() {
-                    wait_for_output = Some(output_notify.notified());
-                }
-            }
-
-            if drained_chunks.is_empty() {
-                let remaining = deadline.saturating_duration_since(Instant::now());
-                if remaining == Duration::ZERO {
-                    break;
-                }
-
-                let notified = wait_for_output.unwrap_or_else(|| output_notify.notified());
-                tokio::pin!(notified);
-                tokio::select! {
-                    _ = &mut notified => {}
-                    _ = tokio::time::sleep(remaining) => break,
-                }
-                continue;
-            }
-
-            for chunk in drained_chunks {
-                collected.extend_from_slice(&chunk);
-            }
-
-            if Instant::now() >= deadline {
-                break;
-            }
-        }
-
-        let (output, _maybe_tokens) = truncate_middle(
-            &String::from_utf8_lossy(&collected),
-            UNIFIED_EXEC_OUTPUT_MAX_BYTES,
-        );
-        let output = if let Some(warning) = timeout_warning {
-            format!("{warning}{output}")
-        } else {
-            output
-        };
-
-        let should_store_session = if let Some(session) = new_session.as_ref() {
-            !session.has_exited()
-        } else if request.session_id.is_some() {
-            let mut sessions = self.sessions.lock().await;
-            if let Some(existing) = sessions.get(&session_id) {
-                if existing.has_exited() {
-                    sessions.remove(&session_id);
-                    false
-                } else {
-                    true
-                }
-            } else {
-                false
-            }
-        } else {
-            true
-        };
-
-        if should_store_session {
-            if let Some(session) = new_session {
-                self.sessions.lock().await.insert(session_id, session);
-            }
-            Ok(UnifiedExecResult {
-                session_id: Some(session_id),
-                output,
-            })
-        } else {
-            Ok(UnifiedExecResult {
-                session_id: None,
-                output,
-            })
-        }
-    }
-}
-
-async fn create_unified_exec_session(
-    command: &[String],
-) -> Result<
-    (
-        ExecCommandSession,
-        tokio::sync::broadcast::Receiver<Vec<u8>>,
-    ),
-    UnifiedExecError,
-> {
-    if command.is_empty() {
-        return Err(UnifiedExecError::MissingCommandLine);
-    }
-
-    let pty_system = native_pty_system();
-
-    let pair = pty_system
-        .openpty(PtySize {
-            rows: 24,
-            cols: 80,
-            pixel_width: 0,
-            pixel_height: 0,
-        })
-        .map_err(UnifiedExecError::create_session)?;
-
-    // Safe thanks to the check at the top of the function.
-    let mut command_builder = CommandBuilder::new(command[0].clone());
-    for arg in &command[1..] {
-        command_builder.arg(arg);
-    }
-
-    let mut child = pair
-        .slave
-        .spawn_command(command_builder)
-        .map_err(UnifiedExecError::create_session)?;
-    let killer = child.clone_killer();
-
-    let (writer_tx, mut writer_rx) = mpsc::channel::<Vec<u8>>(128);
-    let (output_tx, _) = tokio::sync::broadcast::channel::<Vec<u8>>(256);
-
-    let mut reader = pair
-        .master
-        .try_clone_reader()
-        .map_err(UnifiedExecError::create_session)?;
-    let output_tx_clone = output_tx.clone();
-    let reader_handle = tokio::task::spawn_blocking(move || {
-        let mut buf = [0u8; 8192];
-        loop {
-            match reader.read(&mut buf) {
-                Ok(0) => break,
-                Ok(n) => {
-                    let _ = output_tx_clone.send(buf[..n].to_vec());
-                }
-                Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
-                Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
-                    std::thread::sleep(Duration::from_millis(5));
-                    continue;
-                }
-                Err(_) => break,
-            }
-        }
-    });
-
-    let writer = pair
-        .master
-        .take_writer()
-        .map_err(UnifiedExecError::create_session)?;
-    let writer = Arc::new(StdMutex::new(writer));
-    let writer_handle = tokio::spawn({
-        let writer = writer.clone();
-        async move {
-            while let Some(bytes) = writer_rx.recv().await {
-                let writer = writer.clone();
-                let _ = tokio::task::spawn_blocking(move || {
-                    if let Ok(mut guard) = writer.lock() {
-                        use std::io::Write;
-                        let _ = guard.write_all(&bytes);
-                        let _ = guard.flush();
-                    }
-                })
-                .await;
-            }
-        }
-    });
-
-    let exit_status = Arc::new(AtomicBool::new(false));
-    let wait_exit_status = Arc::clone(&exit_status);
-    let wait_handle = tokio::task::spawn_blocking(move || {
-        let _ = child.wait();
-        wait_exit_status.store(true, Ordering::SeqCst);
-    });
-
-    let (session, initial_output_rx) = ExecCommandSession::new(
-        writer_tx,
-        output_tx,
-        killer,
-        reader_handle,
-        writer_handle,
-        wait_handle,
-        exit_status,
-    );
-    Ok((session, initial_output_rx))
+    sessions: Mutex<HashMap<i32, session::UnifiedExecSession>>,
 }

 #[cfg(test)]
+#[cfg(unix)]
 mod tests {
    use super::*;
-    #[cfg(unix)]
+
+    use crate::codex::Session;
+    use crate::codex::TurnContext;
+    use crate::codex::make_session_and_context;
+    use crate::protocol::AskForApproval;
+    use crate::protocol::SandboxPolicy;
    use core_test_support::skip_if_sandbox;
+    use std::sync::Arc;
+    use tokio::time::Duration;
+
+    use super::session::OutputBufferState;
+
+    fn test_session_and_turn() -> (Arc<Session>, Arc<TurnContext>) {
+        let (session, mut turn) = make_session_and_context();
+        turn.approval_policy = AskForApproval::Never;
+        turn.sandbox_policy = SandboxPolicy::DangerFullAccess;
+        (Arc::new(session), Arc::new(turn))
+    }
+
+    async fn run_unified_exec_request(
+        session: &Arc<Session>,
+        turn: &Arc<TurnContext>,
+        session_id: Option<i32>,
+        input: Vec<String>,
+        timeout_ms: Option<u64>,
+    ) -> Result<UnifiedExecResult, UnifiedExecError> {
+        let request_input = input;
+        let request = UnifiedExecRequest {
+            input_chunks: &request_input,
+            timeout_ms,
+        };
+
+        session
+            .services
+            .unified_exec_manager
+            .handle_request(
+                request,
+                UnifiedExecContext {
+                    session,
+                    turn: turn.as_ref(),
+                    sub_id: "sub",
+                    call_id: "call",
+                    session_id,
+                },
+            )
+            .await
+    }

    #[test]
    fn push_chunk_trims_only_excess_bytes() {
@@ -426,167 +126,170 @@ mod tests {
        buffer.push_chunk(vec![b'c']);

        assert_eq!(buffer.total_bytes, UNIFIED_EXEC_OUTPUT_MAX_BYTES);
-        assert_eq!(buffer.chunks.len(), 3);
+        let snapshot = buffer.snapshot();
+        assert_eq!(snapshot.len(), 3);
        assert_eq!(
-            buffer.chunks.front().unwrap().len(),
+            snapshot.first().unwrap().len(),
            UNIFIED_EXEC_OUTPUT_MAX_BYTES - 2
        );
-        assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'c']);
-        assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'b']);
+        assert_eq!(snapshot.get(2).unwrap(), &vec![b'c']);
+        assert_eq!(snapshot.get(1).unwrap(), &vec![b'b']);
    }

-    #[cfg(unix)]
    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-    async fn unified_exec_persists_across_requests_jif() -> Result<(), UnifiedExecError> {
+    async fn unified_exec_persists_across_requests() -> anyhow::Result<()> {
        skip_if_sandbox!(Ok(()));

-        let manager = UnifiedExecSessionManager::default();
+        let (session, turn) = test_session_and_turn();

-        let open_shell = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: None,
-                input_chunks: &["bash".to_string(), "-i".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        let open_shell = run_unified_exec_request(
+            &session,
+            &turn,
+            None,
+            vec!["bash".to_string(), "-i".to_string()],
+            Some(2_500),
+        )
+        .await?;
        let session_id = open_shell.session_id.expect("expected session_id");

-        manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_id),
-                input_chunks: &[
-                    "export".to_string(),
-                    "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
-                ],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        run_unified_exec_request(
+            &session,
+            &turn,
+            Some(session_id),
+            vec![
+                "export".to_string(),
+                "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
+            ],
+            Some(2_500),
+        )
+        .await?;

-        let out_2 = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_id),
-                input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        let out_2 = run_unified_exec_request(
+            &session,
+            &turn,
+            Some(session_id),
+            vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
+            Some(2_500),
+        )
+        .await?;
        assert!(out_2.output.contains("codex"));

        Ok(())
    }

-    #[cfg(unix)]
    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-    async fn multi_unified_exec_sessions() -> Result<(), UnifiedExecError> {
+    async fn multi_unified_exec_sessions() -> anyhow::Result<()> {
        skip_if_sandbox!(Ok(()));

-        let manager = UnifiedExecSessionManager::default();
+        let (session, turn) = test_session_and_turn();

-        let shell_a = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: None,
-                input_chunks: &["/bin/bash".to_string(), "-i".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        let shell_a = run_unified_exec_request(
+            &session,
+            &turn,
+            None,
+            vec!["/bin/bash".to_string(), "-i".to_string()],
+            Some(2_500),
+        )
+        .await?;
        let session_a = shell_a.session_id.expect("expected session id");

-        manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_a),
-                input_chunks: &["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        run_unified_exec_request(
+            &session,
+            &turn,
+            Some(session_a),
+            vec!["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()],
+            Some(2_500),
+        )
+        .await?;

-        let out_2 = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: None,
-                input_chunks: &[
-                    "echo".to_string(),
-                    "$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(),
-                ],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        let out_2 = run_unified_exec_request(
+            &session,
+            &turn,
+            None,
+            vec![
+                "echo".to_string(),
+                "$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(),
+            ],
+            Some(2_500),
+        )
+        .await?;
        assert!(!out_2.output.contains("codex"));

-        let out_3 = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_a),
-                input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        let out_3 = run_unified_exec_request(
+            &session,
+            &turn,
+            Some(session_a),
+            vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
+            Some(2_500),
+        )
+        .await?;
        assert!(out_3.output.contains("codex"));

        Ok(())
    }

-    #[cfg(unix)]
    #[tokio::test]
-    async fn unified_exec_timeouts() -> Result<(), UnifiedExecError> {
+    async fn unified_exec_timeouts() -> anyhow::Result<()> {
        skip_if_sandbox!(Ok(()));

-        let manager = UnifiedExecSessionManager::default();
+        let (session, turn) = test_session_and_turn();

-        let open_shell = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: None,
-                input_chunks: &["bash".to_string(), "-i".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        let open_shell = run_unified_exec_request(
+            &session,
+            &turn,
+            None,
+            vec!["bash".to_string(), "-i".to_string()],
+            Some(2_500),
+        )
+        .await?;
        let session_id = open_shell.session_id.expect("expected session id");

-        manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_id),
-                input_chunks: &[
-                    "export".to_string(),
-                    "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
-                ],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        run_unified_exec_request(
+            &session,
+            &turn,
+            Some(session_id),
+            vec![
+                "export".to_string(),
+                "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(),
+            ],
+            Some(2_500),
+        )
+        .await?;

-        let out_2 = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_id),
-                input_chunks: &["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
-                timeout_ms: Some(10),
-            })
-            .await?;
+        let out_2 = run_unified_exec_request(
+            &session,
+            &turn,
+            Some(session_id),
+            vec!["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()],
+            Some(10),
+        )
+        .await?;
        assert!(!out_2.output.contains("codex"));

        tokio::time::sleep(Duration::from_secs(7)).await;

-        let empty = Vec::new();
-        let out_3 = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_id),
-                input_chunks: &empty,
-                timeout_ms: Some(100),
-            })
-            .await?;
+        let out_3 =
+            run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100))
+                .await?;

        assert!(out_3.output.contains("codex"));

        Ok(())
    }

-    #[cfg(unix)]
    #[tokio::test]
    #[ignore] // Ignored while we have a better way to test this.
-    async fn requests_with_large_timeout_are_capped() -> Result<(), UnifiedExecError> {
-        let manager = UnifiedExecSessionManager::default();
+    async fn requests_with_large_timeout_are_capped() -> anyhow::Result<()> {
+        let (session, turn) = test_session_and_turn();

-        let result = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: None,
-                input_chunks: &["echo".to_string(), "codex".to_string()],
-                timeout_ms: Some(120_000),
-            })
-            .await?;
+        let result = run_unified_exec_request(
+            &session,
+            &turn,
+            None,
+            vec!["echo".to_string(), "codex".to_string()],
+            Some(120_000),
+        )
+        .await?;

        assert!(result.output.starts_with(
            "Warning: requested timeout 120000ms exceeds maximum of 60000ms; clamping to 60000ms.\n"
@@ -596,61 +299,66 @@ mod tests {
        Ok(())
    }

-    #[cfg(unix)]
    #[tokio::test]
    #[ignore] // Ignored while we have a better way to test this.
-    async fn completed_commands_do_not_persist_sessions() -> Result<(), UnifiedExecError> {
-        let manager = UnifiedExecSessionManager::default();
-        let result = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: None,
-                input_chunks: &["/bin/echo".to_string(), "codex".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+    async fn completed_commands_do_not_persist_sessions() -> anyhow::Result<()> {
+        let (session, turn) = test_session_and_turn();
+        let result = run_unified_exec_request(
+            &session,
+            &turn,
+            None,
+            vec!["/bin/echo".to_string(), "codex".to_string()],
+            Some(2_500),
+        )
+        .await?;

        assert!(result.session_id.is_none());
        assert!(result.output.contains("codex"));

-        assert!(manager.sessions.lock().await.is_empty());
+        assert!(
+            session
+                .services
+                .unified_exec_manager
+                .sessions
+                .lock()
+                .await
+                .is_empty()
+        );

        Ok(())
    }

-    #[cfg(unix)]
    #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-    async fn reusing_completed_session_returns_unknown_session() -> Result<(), UnifiedExecError> {
+    async fn reusing_completed_session_returns_unknown_session() -> anyhow::Result<()> {
        skip_if_sandbox!(Ok(()));

-        let manager = UnifiedExecSessionManager::default();
+        let (session, turn) = test_session_and_turn();

-        let open_shell = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: None,
-                input_chunks: &["/bin/bash".to_string(), "-i".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        let open_shell = run_unified_exec_request(
+            &session,
+            &turn,
+            None,
+            vec!["/bin/bash".to_string(), "-i".to_string()],
+            Some(2_500),
+        )
+        .await?;
        let session_id = open_shell.session_id.expect("expected session id");

-        manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_id),
-                input_chunks: &["exit\n".to_string()],
-                timeout_ms: Some(2_500),
-            })
-            .await?;
+        run_unified_exec_request(
+            &session,
+            &turn,
+            Some(session_id),
+            vec!["exit\n".to_string()],
+            Some(2_500),
+        )
+        .await?;

        tokio::time::sleep(Duration::from_millis(200)).await;

-        let err = manager
-            .handle_request(UnifiedExecRequest {
-                session_id: Some(session_id),
-                input_chunks: &[],
-                timeout_ms: Some(100),
-            })
-            .await
-            .expect_err("expected unknown session error");
+        let err =
+            run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100))
+                .await
+                .expect_err("expected unknown session error");

        match err {
            UnifiedExecError::UnknownSessionId { session_id: err_id } => {
@@ -659,7 +367,15 @@ mod tests {
            other => panic!("expected UnknownSessionId, got {other:?}"),
        }

-        assert!(!manager.sessions.lock().await.contains_key(&session_id));
+        assert!(
+            !session
+                .services
+                .unified_exec_manager
+                .sessions
+                .lock()
+                .await
+                .contains_key(&session_id)
+        );

        Ok(())
    }
--- a/codex-rs/core/src/unified_exec/session.rs
+++ b/codex-rs/core/src/unified_exec/session.rs
@@ -0,0 +1,217 @@
+#![allow(clippy::module_inception)]
+
+use std::collections::VecDeque;
+use std::sync::Arc;
+
+use tokio::sync::Mutex;
+use tokio::sync::Notify;
+use tokio::sync::mpsc;
+use tokio::sync::oneshot::error::TryRecvError;
+use tokio::task::JoinHandle;
+use tokio::time::Duration;
+
+use crate::exec::ExecToolCallOutput;
+use crate::exec::SandboxType;
+use crate::exec::StreamOutput;
+use crate::exec::is_likely_sandbox_denied;
+use crate::truncate::truncate_middle;
+use codex_utils_pty::ExecCommandSession;
+use codex_utils_pty::SpawnedPty;
+
+use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
+use super::UnifiedExecError;
+
+#[derive(Debug, Default)]
+pub(crate) struct OutputBufferState {
+    chunks: VecDeque<Vec<u8>>,
+    pub(crate) total_bytes: usize,
+}
+
+impl OutputBufferState {
+    pub(super) fn push_chunk(&mut self, chunk: Vec<u8>) {
+        self.total_bytes = self.total_bytes.saturating_add(chunk.len());
+        self.chunks.push_back(chunk);
+
+        let mut excess = self
+            .total_bytes
+            .saturating_sub(UNIFIED_EXEC_OUTPUT_MAX_BYTES);
+
+        while excess > 0 {
+            match self.chunks.front_mut() {
+                Some(front) if excess >= front.len() => {
+                    excess -= front.len();
+                    self.total_bytes = self.total_bytes.saturating_sub(front.len());
+                    self.chunks.pop_front();
+                }
+                Some(front) => {
+                    front.drain(..excess);
+                    self.total_bytes = self.total_bytes.saturating_sub(excess);
+                    break;
+                }
+                None => break,
+            }
+        }
+    }
+
+    pub(super) fn drain(&mut self) -> Vec<Vec<u8>> {
+        let drained: Vec<Vec<u8>> = self.chunks.drain(..).collect();
+        self.total_bytes = 0;
+        drained
+    }
+
+    pub(super) fn snapshot(&self) -> Vec<Vec<u8>> {
+        self.chunks.iter().cloned().collect()
+    }
+}
+
+pub(crate) type OutputBuffer = Arc<Mutex<OutputBufferState>>;
+pub(crate) type OutputHandles = (OutputBuffer, Arc<Notify>);
+
+#[derive(Debug)]
+pub(crate) struct UnifiedExecSession {
+    session: ExecCommandSession,
+    output_buffer: OutputBuffer,
+    output_notify: Arc<Notify>,
+    output_task: JoinHandle<()>,
+    sandbox_type: SandboxType,
+}
+
+impl UnifiedExecSession {
+    pub(super) fn new(
+        session: ExecCommandSession,
+        initial_output_rx: tokio::sync::broadcast::Receiver<Vec<u8>>,
+        sandbox_type: SandboxType,
+    ) -> Self {
+        let output_buffer = Arc::new(Mutex::new(OutputBufferState::default()));
+        let output_notify = Arc::new(Notify::new());
+        let mut receiver = initial_output_rx;
+        let buffer_clone = Arc::clone(&output_buffer);
+        let notify_clone = Arc::clone(&output_notify);
+        let output_task = tokio::spawn(async move {
+            loop {
+                match receiver.recv().await {
+                    Ok(chunk) => {
+                        let mut guard = buffer_clone.lock().await;
+                        guard.push_chunk(chunk);
+                        drop(guard);
+                        notify_clone.notify_waiters();
+                    }
+                    Err(tokio::sync::broadcast::error::RecvError::Lagged(_)) => continue,
+                    Err(tokio::sync::broadcast::error::RecvError::Closed) => break,
+                }
+            }
+        });
+
+        Self {
+            session,
+            output_buffer,
+            output_notify,
+            output_task,
+            sandbox_type,
+        }
+    }
+
+    pub(super) fn writer_sender(&self) -> mpsc::Sender<Vec<u8>> {
+        self.session.writer_sender()
+    }
+
+    pub(super) fn output_handles(&self) -> OutputHandles {
+        (
+            Arc::clone(&self.output_buffer),
+            Arc::clone(&self.output_notify),
+        )
+    }
+
+    pub(super) fn has_exited(&self) -> bool {
+        self.session.has_exited()
+    }
+
+    pub(super) fn exit_code(&self) -> Option<i32> {
+        self.session.exit_code()
+    }
+
+    async fn snapshot_output(&self) -> Vec<Vec<u8>> {
+        let guard = self.output_buffer.lock().await;
+        guard.snapshot()
+    }
+
+    fn sandbox_type(&self) -> SandboxType {
+        self.sandbox_type
+    }
+
+    pub(super) async fn check_for_sandbox_denial(&self) -> Result<(), UnifiedExecError> {
+        if self.sandbox_type() == SandboxType::None || !self.has_exited() {
+            return Ok(());
+        }
+
+        let _ =
+            tokio::time::timeout(Duration::from_millis(20), self.output_notify.notified()).await;
+
+        let collected_chunks = self.snapshot_output().await;
+        let mut aggregated: Vec<u8> = Vec::new();
+        for chunk in collected_chunks {
+            aggregated.extend_from_slice(&chunk);
+        }
+        let aggregated_text = String::from_utf8_lossy(&aggregated).to_string();
+        let exit_code = self.exit_code().unwrap_or(-1);
+
+        let exec_output = ExecToolCallOutput {
+            exit_code,
+            stdout: StreamOutput::new(aggregated_text.clone()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(aggregated_text.clone()),
+            duration: Duration::ZERO,
+            timed_out: false,
+        };
+
+        if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) {
+            let (snippet, _) = truncate_middle(&aggregated_text, UNIFIED_EXEC_OUTPUT_MAX_BYTES);
+            let message = if snippet.is_empty() {
+                format!("exit code {exit_code}")
+            } else {
+                snippet
+            };
+            return Err(UnifiedExecError::sandbox_denied(message, exec_output));
+        }
+
+        Ok(())
+    }
+
+    pub(super) async fn from_spawned(
+        spawned: SpawnedPty,
+        sandbox_type: SandboxType,
+    ) -> Result<Self, UnifiedExecError> {
+        let SpawnedPty {
+            session,
+            output_rx,
+            mut exit_rx,
+        } = spawned;
+        let managed = Self::new(session, output_rx, sandbox_type);
+
+        let exit_ready = match exit_rx.try_recv() {
+            Ok(_) | Err(TryRecvError::Closed) => true,
+            Err(TryRecvError::Empty) => false,
+        };
+
+        if exit_ready {
+            managed.check_for_sandbox_denial().await?;
+            return Ok(managed);
+        }
+
+        tokio::pin!(exit_rx);
+        if tokio::time::timeout(Duration::from_millis(50), &mut exit_rx)
+            .await
+            .is_ok()
+        {
+            managed.check_for_sandbox_denial().await?;
+        }
+
+        Ok(managed)
+    }
+}
+
+impl Drop for UnifiedExecSession {
+    fn drop(&mut self) {
+        self.output_task.abort();
+    }
+}
--- a/codex-rs/core/src/unified_exec/session_manager.rs
+++ b/codex-rs/core/src/unified_exec/session_manager.rs
@@ -0,0 +1,288 @@
+use std::sync::Arc;
+
+use tokio::sync::Notify;
+use tokio::sync::mpsc;
+use tokio::time::Duration;
+use tokio::time::Instant;
+
+use crate::exec_env::create_env;
+use crate::sandboxing::ExecEnv;
+use crate::tools::orchestrator::ToolOrchestrator;
+use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
+use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
+use crate::tools::sandboxing::ToolCtx;
+use crate::truncate::truncate_middle;
+
+use super::DEFAULT_TIMEOUT_MS;
+use super::MAX_TIMEOUT_MS;
+use super::UNIFIED_EXEC_OUTPUT_MAX_BYTES;
+use super::UnifiedExecContext;
+use super::UnifiedExecError;
+use super::UnifiedExecRequest;
+use super::UnifiedExecResult;
+use super::UnifiedExecSessionManager;
+use super::session::OutputBuffer;
+use super::session::UnifiedExecSession;
+
+pub(super) struct SessionAcquisition {
+    pub(super) session_id: i32,
+    pub(super) writer_tx: mpsc::Sender<Vec<u8>>,
+    pub(super) output_buffer: OutputBuffer,
+    pub(super) output_notify: Arc<Notify>,
+    pub(super) new_session: Option<UnifiedExecSession>,
+    pub(super) reuse_requested: bool,
+}
+
+impl UnifiedExecSessionManager {
+    pub(super) async fn acquire_session(
+        &self,
+        request: &UnifiedExecRequest<'_>,
+        context: &UnifiedExecContext<'_>,
+    ) -> Result<SessionAcquisition, UnifiedExecError> {
+        if let Some(existing_id) = context.session_id {
+            let mut sessions = self.sessions.lock().await;
+            match sessions.get(&existing_id) {
+                Some(session) => {
+                    if session.has_exited() {
+                        sessions.remove(&existing_id);
+                        return Err(UnifiedExecError::UnknownSessionId {
+                            session_id: existing_id,
+                        });
+                    }
+                    let (buffer, notify) = session.output_handles();
+                    let writer_tx = session.writer_sender();
+                    Ok(SessionAcquisition {
+                        session_id: existing_id,
+                        writer_tx,
+                        output_buffer: buffer,
+                        output_notify: notify,
+                        new_session: None,
+                        reuse_requested: true,
+                    })
+                }
+                None => Err(UnifiedExecError::UnknownSessionId {
+                    session_id: existing_id,
+                }),
+            }
+        } else {
+            let new_id = self
+                .next_session_id
+                .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
+            let managed_session = self
+                .open_session_with_sandbox(request.input_chunks.to_vec(), context)
+                .await?;
+            let (buffer, notify) = managed_session.output_handles();
+            let writer_tx = managed_session.writer_sender();
+            Ok(SessionAcquisition {
+                session_id: new_id,
+                writer_tx,
+                output_buffer: buffer,
+                output_notify: notify,
+                new_session: Some(managed_session),
+                reuse_requested: false,
+            })
+        }
+    }
+
+    pub(crate) async fn open_session_with_exec_env(
+        &self,
+        env: &ExecEnv,
+    ) -> Result<UnifiedExecSession, UnifiedExecError> {
+        let (program, args) = env
+            .command
+            .split_first()
+            .ok_or(UnifiedExecError::MissingCommandLine)?;
+        let spawned =
+            codex_utils_pty::spawn_pty_process(program, args, env.cwd.as_path(), &env.env)
+                .await
+                .map_err(|err| UnifiedExecError::create_session(err.to_string()))?;
+        UnifiedExecSession::from_spawned(spawned, env.sandbox).await
+    }
+
+    pub(super) async fn open_session_with_sandbox(
+        &self,
+        command: Vec<String>,
+        context: &UnifiedExecContext<'_>,
+    ) -> Result<UnifiedExecSession, UnifiedExecError> {
+        let mut orchestrator = ToolOrchestrator::new();
+        let mut runtime = UnifiedExecRuntime::new(self);
+        let req = UnifiedExecToolRequest::new(
+            command,
+            context.turn.cwd.clone(),
+            create_env(&context.turn.shell_environment_policy),
+        );
+        let tool_ctx = ToolCtx {
+            session: context.session,
+            sub_id: context.sub_id.to_string(),
+            call_id: context.call_id.to_string(),
+            tool_name: "unified_exec".to_string(),
+        };
+        orchestrator
+            .run(
+                &mut runtime,
+                &req,
+                &tool_ctx,
+                context.turn,
+                context.turn.approval_policy,
+            )
+            .await
+            .map_err(|e| UnifiedExecError::create_session(format!("{e:?}")))
+    }
+
+    pub(super) async fn collect_output_until_deadline(
+        output_buffer: &OutputBuffer,
+        output_notify: &Arc<Notify>,
+        deadline: Instant,
+    ) -> Vec<u8> {
+        let mut collected: Vec<u8> = Vec::with_capacity(4096);
+        loop {
+            let drained_chunks;
+            let mut wait_for_output = None;
+            {
+                let mut guard = output_buffer.lock().await;
+                drained_chunks = guard.drain();
+                if drained_chunks.is_empty() {
+                    wait_for_output = Some(output_notify.notified());
+                }
+            }
+
+            if drained_chunks.is_empty() {
+                let remaining = deadline.saturating_duration_since(Instant::now());
+                if remaining == Duration::ZERO {
+                    break;
+                }
+
+                let notified = wait_for_output.unwrap_or_else(|| output_notify.notified());
+                tokio::pin!(notified);
+                tokio::select! {
+                    _ = &mut notified => {}
+                    _ = tokio::time::sleep(remaining) => break,
+                }
+                continue;
+            }
+
+            for chunk in drained_chunks {
+                collected.extend_from_slice(&chunk);
+            }
+
+            if Instant::now() >= deadline {
+                break;
+            }
+        }
+
+        collected
+    }
+
+    pub(super) async fn should_store_session(&self, acquisition: &SessionAcquisition) -> bool {
+        if let Some(session) = acquisition.new_session.as_ref() {
+            !session.has_exited()
+        } else if acquisition.reuse_requested {
+            let mut sessions = self.sessions.lock().await;
+            if let Some(existing) = sessions.get(&acquisition.session_id) {
+                if existing.has_exited() {
+                    sessions.remove(&acquisition.session_id);
+                    false
+                } else {
+                    true
+                }
+            } else {
+                false
+            }
+        } else {
+            true
+        }
+    }
+
+    pub(super) async fn send_input_chunks(
+        writer_tx: &mpsc::Sender<Vec<u8>>,
+        chunks: &[String],
+    ) -> Result<(), UnifiedExecError> {
+        let mut trailing_whitespace = true;
+        for chunk in chunks {
+            if chunk.is_empty() {
+                continue;
+            }
+
+            let leading_whitespace = chunk
+                .chars()
+                .next()
+                .map(char::is_whitespace)
+                .unwrap_or(true);
+
+            if !trailing_whitespace
+                && !leading_whitespace
+                && writer_tx.send(vec![b' ']).await.is_err()
+            {
+                return Err(UnifiedExecError::WriteToStdin);
+            }
+
+            if writer_tx.send(chunk.as_bytes().to_vec()).await.is_err() {
+                return Err(UnifiedExecError::WriteToStdin);
+            }
+
+            trailing_whitespace = chunk
+                .chars()
+                .next_back()
+                .map(char::is_whitespace)
+                .unwrap_or(trailing_whitespace);
+        }
+
+        Ok(())
+    }
+
+    pub async fn handle_request(
+        &self,
+        request: UnifiedExecRequest<'_>,
+        context: UnifiedExecContext<'_>,
+    ) -> Result<UnifiedExecResult, UnifiedExecError> {
+        let (timeout_ms, timeout_warning) = match request.timeout_ms {
+            Some(requested) if requested > MAX_TIMEOUT_MS => (
+                MAX_TIMEOUT_MS,
+                Some(format!(
+                    "Warning: requested timeout {requested}ms exceeds maximum of {MAX_TIMEOUT_MS}ms; clamping to {MAX_TIMEOUT_MS}ms.\n"
+                )),
+            ),
+            Some(requested) => (requested, None),
+            None => (DEFAULT_TIMEOUT_MS, None),
+        };
+
+        let mut acquisition = self.acquire_session(&request, &context).await?;
+
+        if acquisition.reuse_requested {
+            Self::send_input_chunks(&acquisition.writer_tx, request.input_chunks).await?;
+        }
+
+        let deadline = Instant::now() + Duration::from_millis(timeout_ms);
+        let collected = Self::collect_output_until_deadline(
+            &acquisition.output_buffer,
+            &acquisition.output_notify,
+            deadline,
+        )
+        .await;
+
+        let (output, _maybe_tokens) = truncate_middle(
+            &String::from_utf8_lossy(&collected),
+            UNIFIED_EXEC_OUTPUT_MAX_BYTES,
+        );
+        let output = if let Some(warning) = timeout_warning {
+            format!("{warning}{output}")
+        } else {
+            output
+        };
+
+        let should_store_session = self.should_store_session(&acquisition).await;
+        let session_id = if should_store_session {
+            if let Some(session) = acquisition.new_session.take() {
+                self.sessions
+                    .lock()
+                    .await
+                    .insert(acquisition.session_id, session);
+            }
+            Some(acquisition.session_id)
+        } else {
+            None
+        };
+
+        Ok(UnifiedExecResult { session_id, output })
+    }
+}