chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand the structure
2025-10-20 20:57:37 +01:00
parent c84fc83222
commit 5e4f3bbb0b
59 changed files with 2630 additions and 3374 deletions
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -232,6 +232,7 @@ mod tests {
 }

 #[derive(Clone, Debug)]
+#[allow(dead_code)]
 pub(crate) struct ExecCommandContext {
    pub(crate) sub_id: String,
    pub(crate) call_id: String,
@@ -243,6 +244,7 @@ pub(crate) struct ExecCommandContext {
 }

 #[derive(Clone, Debug)]
+#[allow(dead_code)]
 pub(crate) struct ApplyPatchCommandContext {
    pub(crate) user_explicitly_approved_this_action: bool,
    pub(crate) changes: HashMap<PathBuf, FileChange>,
--- a/codex-rs/core/src/tools/events.rs
+++ b/codex-rs/core/src/tools/events.rs
@@ -0,0 +1,235 @@
+use crate::codex::Session;
+use crate::exec::ExecToolCallOutput;
+use crate::parse_command::parse_command;
+use crate::protocol::Event;
+use crate::protocol::EventMsg;
+use crate::protocol::ExecCommandBeginEvent;
+use crate::protocol::ExecCommandEndEvent;
+use crate::protocol::FileChange;
+use crate::protocol::PatchApplyBeginEvent;
+use crate::protocol::PatchApplyEndEvent;
+use crate::protocol::TurnDiffEvent;
+use crate::tools::context::SharedTurnDiffTracker;
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::time::Duration;
+
+use super::format_exec_output;
+use super::format_exec_output_str;
+
+#[derive(Clone, Copy)]
+pub(crate) struct ToolEventCtx<'a> {
+    pub session: &'a Session,
+    pub sub_id: &'a str,
+    pub call_id: &'a str,
+    pub turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
+}
+
+impl<'a> ToolEventCtx<'a> {
+    pub fn new(
+        session: &'a Session,
+        sub_id: &'a str,
+        call_id: &'a str,
+        turn_diff_tracker: Option<&'a SharedTurnDiffTracker>,
+    ) -> Self {
+        Self {
+            session,
+            sub_id,
+            call_id,
+            turn_diff_tracker,
+        }
+    }
+}
+
+pub(crate) enum ToolEventStage {
+    Begin,
+    Success(ExecToolCallOutput),
+    Failure(ToolEventFailure),
+}
+
+pub(crate) enum ToolEventFailure {
+    Output(ExecToolCallOutput),
+    Message(String),
+}
+// Concrete, allocation-free emitter: avoid trait objects and boxed futures.
+pub(crate) enum ToolEmitter {
+    Shell {
+        command: Vec<String>,
+        cwd: PathBuf,
+    },
+    ApplyPatch {
+        changes: HashMap<PathBuf, FileChange>,
+        auto_approved: bool,
+    },
+}
+
+impl ToolEmitter {
+    pub fn shell(command: Vec<String>, cwd: PathBuf) -> Self {
+        Self::Shell { command, cwd }
+    }
+
+    pub fn apply_patch(changes: HashMap<PathBuf, FileChange>, auto_approved: bool) -> Self {
+        Self::ApplyPatch {
+            changes,
+            auto_approved,
+        }
+    }
+
+    pub async fn emit(&self, ctx: ToolEventCtx<'_>, stage: ToolEventStage) {
+        match (self, stage) {
+            (Self::Shell { command, cwd }, ToolEventStage::Begin) => {
+                ctx.session
+                    .send_event(Event {
+                        id: ctx.sub_id.to_string(),
+                        msg: EventMsg::ExecCommandBegin(ExecCommandBeginEvent {
+                            call_id: ctx.call_id.to_string(),
+                            command: command.clone(),
+                            cwd: cwd.clone(),
+                            parsed_cmd: parse_command(command),
+                        }),
+                    })
+                    .await;
+            }
+            (Self::Shell { .. }, ToolEventStage::Success(output)) => {
+                emit_exec_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.aggregated_output.text.clone(),
+                    output.exit_code,
+                    output.duration,
+                    format_exec_output_str(&output),
+                )
+                .await;
+            }
+            (Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Output(output))) => {
+                emit_exec_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.aggregated_output.text.clone(),
+                    output.exit_code,
+                    output.duration,
+                    format_exec_output_str(&output),
+                )
+                .await;
+            }
+            (Self::Shell { .. }, ToolEventStage::Failure(ToolEventFailure::Message(message))) => {
+                emit_exec_end(
+                    ctx,
+                    String::new(),
+                    (*message).to_string(),
+                    (*message).to_string(),
+                    -1,
+                    Duration::ZERO,
+                    format_exec_output(&message),
+                )
+                .await;
+            }
+
+            (
+                Self::ApplyPatch {
+                    changes,
+                    auto_approved,
+                },
+                ToolEventStage::Begin,
+            ) => {
+                if let Some(tracker) = ctx.turn_diff_tracker {
+                    let mut guard = tracker.lock().await;
+                    guard.on_patch_begin(changes);
+                }
+                ctx.session
+                    .send_event(Event {
+                        id: ctx.sub_id.to_string(),
+                        msg: EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
+                            call_id: ctx.call_id.to_string(),
+                            auto_approved: *auto_approved,
+                            changes: changes.clone(),
+                        }),
+                    })
+                    .await;
+            }
+            (Self::ApplyPatch { .. }, ToolEventStage::Success(output)) => {
+                emit_patch_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.exit_code == 0,
+                )
+                .await;
+            }
+            (
+                Self::ApplyPatch { .. },
+                ToolEventStage::Failure(ToolEventFailure::Output(output)),
+            ) => {
+                emit_patch_end(
+                    ctx,
+                    output.stdout.text.clone(),
+                    output.stderr.text.clone(),
+                    output.exit_code == 0,
+                )
+                .await;
+            }
+            (
+                Self::ApplyPatch { .. },
+                ToolEventStage::Failure(ToolEventFailure::Message(message)),
+            ) => {
+                emit_patch_end(ctx, String::new(), (*message).to_string(), false).await;
+            }
+        }
+    }
+}
+
+async fn emit_exec_end(
+    ctx: ToolEventCtx<'_>,
+    stdout: String,
+    stderr: String,
+    aggregated_output: String,
+    exit_code: i32,
+    duration: Duration,
+    formatted_output: String,
+) {
+    ctx.session
+        .send_event(Event {
+            id: ctx.sub_id.to_string(),
+            msg: EventMsg::ExecCommandEnd(ExecCommandEndEvent {
+                call_id: ctx.call_id.to_string(),
+                stdout,
+                stderr,
+                aggregated_output,
+                exit_code,
+                duration,
+                formatted_output,
+            }),
+        })
+        .await;
+}
+
+async fn emit_patch_end(ctx: ToolEventCtx<'_>, stdout: String, stderr: String, success: bool) {
+    ctx.session
+        .send_event(Event {
+            id: ctx.sub_id.to_string(),
+            msg: EventMsg::PatchApplyEnd(PatchApplyEndEvent {
+                call_id: ctx.call_id.to_string(),
+                stdout,
+                stderr,
+                success,
+            }),
+        })
+        .await;
+
+    if let Some(tracker) = ctx.turn_diff_tracker {
+        let unified_diff = {
+            let mut guard = tracker.lock().await;
+            guard.get_unified_diff()
+        };
+        if let Ok(Some(unified_diff)) = unified_diff {
+            ctx.session
+                .send_event(Event {
+                    id: ctx.sub_id.to_string(),
+                    msg: EventMsg::TurnDiff(TurnDiffEvent { unified_diff }),
+                })
+                .await;
+        }
+    }
+}
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -8,7 +8,6 @@ use crate::client_common::tools::ResponsesApiTool;
 use crate::client_common::tools::ToolSpec;
 use crate::exec::ExecParams;
 use crate::function_tool::FunctionCallError;
-use crate::openai_tools::JsonSchema;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
@@ -16,6 +15,7 @@ use crate::tools::handle_container_exec_with_params;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
 use crate::tools::spec::ApplyPatchToolArgs;
+use crate::tools::spec::JsonSchema;
 use async_trait::async_trait;
 use serde::Deserialize;
 use serde::Serialize;
@@ -72,6 +72,7 @@ impl ToolHandler for ApplyPatchHandler {
            env: HashMap::new(),
            with_escalated_permissions: None,
            justification: None,
+            arg0: None,
        };

        let content = handle_container_exec_with_params(
--- a/codex-rs/core/src/tools/handlers/exec_stream.rs
+++ b/codex-rs/core/src/tools/handlers/exec_stream.rs
@@ -1,68 +0,0 @@
-use async_trait::async_trait;
-
-use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
-use crate::exec_command::ExecCommandParams;
-use crate::exec_command::WRITE_STDIN_TOOL_NAME;
-use crate::exec_command::WriteStdinParams;
-use crate::function_tool::FunctionCallError;
-use crate::tools::context::ToolInvocation;
-use crate::tools::context::ToolOutput;
-use crate::tools::context::ToolPayload;
-use crate::tools::registry::ToolHandler;
-use crate::tools::registry::ToolKind;
-
-pub struct ExecStreamHandler;
-
-#[async_trait]
-impl ToolHandler for ExecStreamHandler {
-    fn kind(&self) -> ToolKind {
-        ToolKind::Function
-    }
-
-    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
-        let ToolInvocation {
-            session,
-            tool_name,
-            payload,
-            ..
-        } = invocation;
-
-        let arguments = match payload {
-            ToolPayload::Function { arguments } => arguments,
-            _ => {
-                return Err(FunctionCallError::RespondToModel(
-                    "exec_stream handler received unsupported payload".to_string(),
-                ));
-            }
-        };
-
-        let content = match tool_name.as_str() {
-            EXEC_COMMAND_TOOL_NAME => {
-                let params: ExecCommandParams = serde_json::from_str(&arguments).map_err(|e| {
-                    FunctionCallError::RespondToModel(format!(
-                        "failed to parse function arguments: {e:?}"
-                    ))
-                })?;
-                session.handle_exec_command_tool(params).await?
-            }
-            WRITE_STDIN_TOOL_NAME => {
-                let params: WriteStdinParams = serde_json::from_str(&arguments).map_err(|e| {
-                    FunctionCallError::RespondToModel(format!(
-                        "failed to parse function arguments: {e:?}"
-                    ))
-                })?;
-                session.handle_write_stdin_tool(params).await?
-            }
-            _ => {
-                return Err(FunctionCallError::RespondToModel(format!(
-                    "exec_stream handler does not support tool {tool_name}"
-                )));
-            }
-        };
-
-        Ok(ToolOutput::Function {
-            content,
-            success: Some(true),
-        })
-    }
-}
--- a/codex-rs/core/src/tools/handlers/mod.rs
+++ b/codex-rs/core/src/tools/handlers/mod.rs
@@ -1,5 +1,4 @@
 pub mod apply_patch;
-mod exec_stream;
 mod grep_files;
 mod list_dir;
 mod mcp;
@@ -14,7 +13,6 @@ mod view_image;
 pub use plan::PLAN_TOOL;

 pub use apply_patch::ApplyPatchHandler;
-pub use exec_stream::ExecStreamHandler;
 pub use grep_files::GrepFilesHandler;
 pub use list_dir::ListDirHandler;
 pub use mcp::McpHandler;
--- a/codex-rs/core/src/tools/handlers/plan.rs
+++ b/codex-rs/core/src/tools/handlers/plan.rs
@@ -2,12 +2,12 @@ use crate::client_common::tools::ResponsesApiTool;
 use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
 use crate::function_tool::FunctionCallError;
-use crate::openai_tools::JsonSchema;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
 use crate::tools::registry::ToolHandler;
 use crate::tools::registry::ToolKind;
+use crate::tools::spec::JsonSchema;
 use async_trait::async_trait;
 use codex_protocol::plan_tool::UpdatePlanArgs;
 use codex_protocol::protocol::Event;
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -24,6 +24,7 @@ impl ShellHandler {
            env: create_env(&turn_context.shell_environment_policy),
            with_escalated_permissions: params.with_escalated_permissions,
            justification: params.justification,
+            arg0: None,
        }
    }
 }
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -35,7 +35,13 @@ impl ToolHandler for UnifiedExecHandler {

    async fn handle(&self, invocation: ToolInvocation) -> Result<ToolOutput, FunctionCallError> {
        let ToolInvocation {
-            session, payload, ..
+            session,
+            turn,
+            sub_id,
+            call_id,
+            tool_name: _tool_name,
+            payload,
+            ..
        } = invocation;

        let args = match payload {
@@ -73,13 +79,23 @@ impl ToolHandler for UnifiedExecHandler {
        };

        let request = UnifiedExecRequest {
-            session_id: parsed_session_id,
            input_chunks: &input,
            timeout_ms,
        };

        let value = session
-            .run_unified_exec_request(request)
+            .services
+            .unified_exec_manager
+            .handle_request(
+                request,
+                crate::unified_exec::UnifiedExecContext {
+                    session: &session,
+                    turn: turn.as_ref(),
+                    sub_id: &sub_id,
+                    call_id: &call_id,
+                    session_id: parsed_session_id,
+                },
+            )
            .await
            .map_err(|err| {
                FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}"))
--- a/codex-rs/core/src/tools/mod.rs
+++ b/codex-rs/core/src/tools/mod.rs
@@ -1,12 +1,15 @@
 pub mod context;
+pub mod events;
 pub(crate) mod handlers;
+pub mod orchestrator;
 pub mod parallel;
 pub mod registry;
 pub mod router;
+pub mod runtimes;
+pub mod sandboxing;
 pub mod spec;

 use crate::apply_patch;
-use crate::apply_patch::ApplyPatchExec;
 use crate::apply_patch::InternalApplyPatchInvocation;
 use crate::apply_patch::convert_apply_patch_to_protocol;
 use crate::codex::Session;
@@ -15,14 +18,19 @@ use crate::error::CodexErr;
 use crate::error::SandboxErr;
 use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
-use crate::exec::StdoutStream;
-use crate::executor::ExecutionMode;
-use crate::executor::errors::ExecError;
-use crate::executor::linkers::PreparedExec;
 use crate::function_tool::FunctionCallError;
-use crate::tools::context::ApplyPatchCommandContext;
-use crate::tools::context::ExecCommandContext;
 use crate::tools::context::SharedTurnDiffTracker;
+use crate::tools::events::ToolEmitter;
+use crate::tools::events::ToolEventCtx;
+use crate::tools::events::ToolEventFailure;
+use crate::tools::events::ToolEventStage;
+use crate::tools::orchestrator::ToolOrchestrator;
+use crate::tools::runtimes::apply_patch::ApplyPatchRequest;
+use crate::tools::runtimes::apply_patch::ApplyPatchRuntime;
+use crate::tools::runtimes::shell::ShellRequest;
+use crate::tools::runtimes::shell::ShellRuntime;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
 use codex_apply_patch::MaybeApplyPatchVerified;
 use codex_apply_patch::maybe_parse_apply_patch_verified;
 use codex_protocol::protocol::AskForApproval;
@@ -56,7 +64,7 @@ pub(crate) async fn handle_container_exec_with_params(
    sub_id: String,
    call_id: String,
 ) -> Result<String, FunctionCallError> {
-    let otel_event_manager = turn_context.client.get_otel_event_manager();
+    let _otel_event_manager = turn_context.client.get_otel_event_manager();

    if params.with_escalated_permissions.unwrap_or(false)
        && !matches!(turn_context.approval_policy, AskForApproval::OnRequest)
@@ -100,86 +108,142 @@ pub(crate) async fn handle_container_exec_with_params(
        MaybeApplyPatchVerified::NotApplyPatch => None,
    };

-    let command_for_display = if let Some(exec) = apply_patch_exec.as_ref() {
-        vec!["apply_patch".to_string(), exec.action.patch.clone()]
-    } else {
-        params.command.clone()
-    };
-
-    let exec_command_context = ExecCommandContext {
-        sub_id: sub_id.clone(),
-        call_id: call_id.clone(),
-        command_for_display: command_for_display.clone(),
-        cwd: params.cwd.clone(),
-        apply_patch: apply_patch_exec.as_ref().map(
-            |ApplyPatchExec {
-                 action,
-                 user_explicitly_approved_this_action,
-             }| ApplyPatchCommandContext {
-                user_explicitly_approved_this_action: *user_explicitly_approved_this_action,
-                changes: convert_apply_patch_to_protocol(action),
-            },
+    let (event_emitter, diff_opt) = match apply_patch_exec.as_ref() {
+        Some(exec) => (
+            ToolEmitter::apply_patch(
+                convert_apply_patch_to_protocol(&exec.action),
+                !exec.user_explicitly_approved_this_action,
+            ),
+            Some(&turn_diff_tracker),
+        ),
+        None => (
+            ToolEmitter::shell(params.command.clone(), params.cwd.clone()),
+            None,
        ),
-        tool_name: tool_name.to_string(),
-        otel_event_manager,
    };

-    let mode = match apply_patch_exec {
-        Some(exec) => ExecutionMode::ApplyPatch(exec),
-        None => ExecutionMode::Shell,
-    };
+    let event_ctx = ToolEventCtx::new(sess.as_ref(), &sub_id, &call_id, diff_opt);
+    event_emitter.emit(event_ctx, ToolEventStage::Begin).await;

-    sess.services.executor.update_environment(
-        turn_context.sandbox_policy.clone(),
-        turn_context.cwd.clone(),
-    );
+    // Build runtime contexts only when needed (shell/apply_patch below).

-    let prepared_exec = PreparedExec::new(
-        exec_command_context,
-        params,
-        command_for_display,
-        mode,
-        Some(StdoutStream {
+    if let Some(exec) = apply_patch_exec {
+        // Route apply_patch execution through the new orchestrator/runtime.
+        let req = ApplyPatchRequest {
+            patch: exec.action.patch.clone(),
+            cwd: params.cwd.clone(),
+            timeout_ms: params.timeout_ms,
+            user_explicitly_approved: exec.user_explicitly_approved_this_action,
+            codex_exe: turn_context.codex_linux_sandbox_exe.clone(),
+        };
+
+        let mut orchestrator = ToolOrchestrator::new();
+        let mut runtime = ApplyPatchRuntime::new();
+        let tool_ctx = ToolCtx {
+            session: sess.as_ref(),
            sub_id: sub_id.clone(),
            call_id: call_id.clone(),
-            tx_event: sess.get_tx_event(),
-        }),
-        turn_context.shell_environment_policy.use_profile,
-    );
+            tool_name: tool_name.to_string(),
+        };

-    let output_result = sess
-        .run_exec_with_events(
-            turn_diff_tracker.clone(),
-            prepared_exec,
-            turn_context.approval_policy,
-        )
-        .await;
+        let out = orchestrator
+            .run(
+                &mut runtime,
+                &req,
+                &tool_ctx,
+                &turn_context,
+                turn_context.approval_policy,
+            )
+            .await;

-    // always make sure to truncate the output if its length isn't controlled.
-    match output_result {
+        handle_exec_outcome(&event_emitter, event_ctx, out).await
+    } else {
+        // Route shell execution through the new orchestrator/runtime.
+        let req = ShellRequest {
+            command: params.command.clone(),
+            cwd: params.cwd.clone(),
+            timeout_ms: params.timeout_ms,
+            env: params.env.clone(),
+            with_escalated_permissions: params.with_escalated_permissions,
+            justification: params.justification.clone(),
+        };
+
+        let mut orchestrator = ToolOrchestrator::new();
+        let mut runtime = ShellRuntime::new();
+        let tool_ctx = ToolCtx {
+            session: sess.as_ref(),
+            sub_id: sub_id.clone(),
+            call_id: call_id.clone(),
+            tool_name: tool_name.to_string(),
+        };
+
+        let out = orchestrator
+            .run(
+                &mut runtime,
+                &req,
+                &tool_ctx,
+                &turn_context,
+                turn_context.approval_policy,
+            )
+            .await;
+
+        handle_exec_outcome(&event_emitter, event_ctx, out).await
+    }
+}
+
+async fn handle_exec_outcome(
+    event_emitter: &ToolEmitter,
+    event_ctx: ToolEventCtx<'_>,
+    out: Result<ExecToolCallOutput, ToolError>,
+) -> Result<String, FunctionCallError> {
+    let event;
+    let result = match out {
        Ok(output) => {
-            let ExecToolCallOutput { exit_code, .. } = &output;
-            let content = format_exec_output_apply_patch(&output);
-            if *exit_code == 0 {
+            let content = format_exec_output_for_model(&output);
+            let exit_code = output.exit_code;
+            event = ToolEventStage::Success(output);
+            if exit_code == 0 {
                Ok(content)
            } else {
                Err(FunctionCallError::RespondToModel(content))
            }
        }
-        Err(ExecError::Function(err)) => Err(truncate_function_error(err)),
-        Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) => Err(
-            FunctionCallError::RespondToModel(format_exec_output_apply_patch(&output)),
-        ),
-        Err(ExecError::Codex(err)) => {
+        Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
+        | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
+            let response = format_exec_output_for_model(&output);
+            event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
+            Err(FunctionCallError::RespondToModel(response))
+        }
+        Err(ToolError::Codex(err)) => {
            let message = format!("execution error: {err:?}");
+            let response = format_exec_output(&message);
+            event = ToolEventStage::Failure(ToolEventFailure::Message(message));
            Err(FunctionCallError::RespondToModel(format_exec_output(
-                &message,
+                &response,
            )))
        }
-    }
+        Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => {
+            // Normalize common rejection messages for exec tools so tests and
+            // users see a clear, consistent phrase.
+            let normalized = if msg == "rejected by user" {
+                "exec command rejected by user".to_string()
+            } else {
+                msg
+            };
+            let response = format_exec_output(&normalized);
+            event = ToolEventStage::Failure(ToolEventFailure::Message(normalized));
+            Err(FunctionCallError::RespondToModel(format_exec_output(
+                &response,
+            )))
+        }
+    };
+    event_emitter.emit(event_ctx, event).await;
+    result
 }

-pub fn format_exec_output_apply_patch(exec_output: &ExecToolCallOutput) -> String {
+/// Format the combined exec output for sending back to the model.
+/// Includes exit code and duration metadata; truncates large bodies safely.
+pub fn format_exec_output_for_model(exec_output: &ExecToolCallOutput) -> String {
    let ExecToolCallOutput {
        exit_code,
        duration,
@@ -233,18 +297,7 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
    format_exec_output(content)
 }

-fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
-    match err {
-        FunctionCallError::RespondToModel(msg) => {
-            FunctionCallError::RespondToModel(format_exec_output(&msg))
-        }
-        FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
-        FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
-        other => other,
-    }
-}
-
-fn format_exec_output(content: &str) -> String {
+pub(super) fn format_exec_output(content: &str) -> String {
    // Head+tail truncation for the model: show the beginning and end with an elision.
    // Clients still receive full streams; only this formatted summary is capped.
    let total_lines = content.lines().count();
@@ -315,6 +368,17 @@ mod tests {
    use super::*;
    use regex_lite::Regex;

+    fn truncate_function_error(err: FunctionCallError) -> FunctionCallError {
+        match err {
+            FunctionCallError::RespondToModel(msg) => {
+                FunctionCallError::RespondToModel(format_exec_output(&msg))
+            }
+            FunctionCallError::Denied(msg) => FunctionCallError::Denied(format_exec_output(&msg)),
+            FunctionCallError::Fatal(msg) => FunctionCallError::Fatal(format_exec_output(&msg)),
+            other => other,
+        }
+    }
+
    fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
        let pattern = truncated_message_pattern(line, total_lines);
        let regex = Regex::new(&pattern).unwrap_or_else(|err| {
--- a/codex-rs/core/src/tools/orchestrator.rs
+++ b/codex-rs/core/src/tools/orchestrator.rs
@@ -0,0 +1,172 @@
+/*
+Module: orchestrator
+
+Central place for approvals + sandbox selection + retry semantics. Drives a
+simple sequence for any ToolRuntime: approval → select sandbox → attempt →
+retry without sandbox on denial (no re‑approval thanks to caching).
+*/
+use crate::error::CodexErr;
+use crate::error::SandboxErr;
+use crate::exec::ExecToolCallOutput;
+use crate::sandboxing::SandboxManager;
+use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
+use crate::tools::sandboxing::ToolRuntime;
+use codex_protocol::protocol::AskForApproval;
+use codex_protocol::protocol::ReviewDecision;
+
+pub(crate) struct ToolOrchestrator {
+    sandbox: SandboxManager,
+}
+
+impl ToolOrchestrator {
+    pub fn new() -> Self {
+        Self {
+            sandbox: SandboxManager::new(),
+        }
+    }
+
+    pub async fn run<Rq, Out, T>(
+        &mut self,
+        tool: &mut T,
+        req: &Rq,
+        tool_ctx: &ToolCtx<'_>,
+        turn_ctx: &crate::codex::TurnContext,
+        approval_policy: AskForApproval,
+    ) -> Result<Out, ToolError>
+    where
+        T: ToolRuntime<Rq, Out>,
+    {
+        let otel = turn_ctx.client.get_otel_event_manager();
+        let otel_tn = &tool_ctx.tool_name;
+        let otel_ci = &tool_ctx.call_id;
+        let otel_user = codex_otel::otel_event_manager::ToolDecisionSource::User;
+        let otel_cfg = codex_otel::otel_event_manager::ToolDecisionSource::Config;
+
+        // 1) Approval
+        let needs_initial_approval =
+            tool.wants_initial_approval(req, approval_policy, &turn_ctx.sandbox_policy);
+        let mut already_approved = false;
+
+        if needs_initial_approval {
+            let approval_ctx = ApprovalCtx {
+                session: tool_ctx.session,
+                sub_id: &tool_ctx.sub_id,
+                call_id: &tool_ctx.call_id,
+                retry_reason: None,
+            };
+            let decision = tool.start_approval_async(req, approval_ctx).await;
+
+            otel.tool_decision(otel_tn, otel_ci, decision, otel_user.clone());
+
+            match decision {
+                ReviewDecision::Denied | ReviewDecision::Abort => {
+                    return Err(ToolError::Rejected("rejected by user".to_string()));
+                }
+                ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
+            }
+            already_approved = true;
+        } else {
+            otel.tool_decision(otel_tn, otel_ci, ReviewDecision::Approved, otel_cfg);
+        }
+
+        // 2) First attempt under the selected sandbox.
+        let mut initial_sandbox = self
+            .sandbox
+            .select_initial(&turn_ctx.sandbox_policy, tool.sandbox_preference());
+        if tool.wants_escalated_first_attempt(req) {
+            initial_sandbox = crate::exec::SandboxType::None;
+        }
+        let initial_attempt = SandboxAttempt {
+            sandbox: initial_sandbox,
+            policy: &turn_ctx.sandbox_policy,
+            manager: &self.sandbox,
+            sandbox_cwd: &turn_ctx.cwd,
+            codex_linux_sandbox_exe: turn_ctx.codex_linux_sandbox_exe.as_ref(),
+        };
+
+        match tool.run(req, &initial_attempt, tool_ctx).await {
+            Ok(out) => {
+                // We have a successful initial result
+                Ok(out)
+            }
+            Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
+                if !tool.escalate_on_failure() {
+                    return Err(ToolError::SandboxDenied(
+                        "sandbox denied and no retry".to_string(),
+                    ));
+                }
+                // Under `Never`, do not retry without sandbox; surface a concise message
+                // derived from the actual output (platform-agnostic).
+                if matches!(approval_policy, AskForApproval::Never) {
+                    let msg = build_never_denied_message_from_output(output.as_ref());
+                    return Err(ToolError::SandboxDenied(msg));
+                }
+
+                // Ask for approval before retrying without sandbox.
+                if !tool.should_bypass_approval(approval_policy, already_approved) {
+                    let reason_msg = build_denial_reason_from_output(output.as_ref());
+                    let approval_ctx = ApprovalCtx {
+                        session: tool_ctx.session,
+                        sub_id: &tool_ctx.sub_id,
+                        call_id: &tool_ctx.call_id,
+                        retry_reason: Some(reason_msg),
+                    };
+
+                    let decision = tool.start_approval_async(req, approval_ctx).await;
+                    otel.tool_decision(otel_tn, otel_ci, decision, otel_user);
+
+                    match decision {
+                        ReviewDecision::Denied | ReviewDecision::Abort => {
+                            return Err(ToolError::Rejected("rejected by user".to_string()));
+                        }
+                        ReviewDecision::Approved | ReviewDecision::ApprovedForSession => {}
+                    }
+                }
+
+                let escalated_attempt = SandboxAttempt {
+                    sandbox: crate::exec::SandboxType::None,
+                    policy: &turn_ctx.sandbox_policy,
+                    manager: &self.sandbox,
+                    sandbox_cwd: &turn_ctx.cwd,
+                    codex_linux_sandbox_exe: None,
+                };
+
+                // Second attempt.
+                (*tool).run(req, &escalated_attempt, tool_ctx).await
+            }
+            other => other,
+        }
+    }
+}
+
+fn build_never_denied_message_from_output(output: &ExecToolCallOutput) -> String {
+    let body = format!(
+        "{}\n{}\n{}",
+        output.stderr.text, output.stdout.text, output.aggregated_output.text
+    )
+    .to_lowercase();
+
+    let detail = if body.contains("permission denied") {
+        Some("Permission denied")
+    } else if body.contains("operation not permitted") {
+        Some("Operation not permitted")
+    } else if body.contains("read-only file system") {
+        Some("Read-only file system")
+    } else {
+        None
+    };
+
+    match detail {
+        Some(tag) => format!("failed in sandbox: {tag}"),
+        None => "failed in sandbox".to_string(),
+    }
+}
+
+fn build_denial_reason_from_output(_output: &ExecToolCallOutput) -> String {
+    // Keep approval reason terse and stable for UX/tests, but accept the
+    // output so we can evolve heuristics later without touching call sites.
+    "command failed; retry without sandbox?".to_string()
+}
--- a/codex-rs/core/src/tools/runtimes/apply_patch.rs
+++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs
@@ -0,0 +1,148 @@
+//! Apply Patch runtime: executes verified patches under the orchestrator.
+//!
+//! Assumes `apply_patch` verification/approval happened upstream. Reuses that
+//! decision to avoid re-prompting, builds the self-invocation command for
+//! `codex --codex-run-as-apply-patch`, and runs under the current
+//! `SandboxAttempt` with a minimal environment.
+use crate::CODEX_APPLY_PATCH_ARG1;
+use crate::exec::ExecToolCallOutput;
+use crate::sandboxing::CommandSpec;
+use crate::sandboxing::execute_env;
+use crate::tools::sandboxing::Approvable;
+use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::Sandboxable;
+use crate::tools::sandboxing::SandboxablePreference;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
+use crate::tools::sandboxing::ToolRuntime;
+use crate::tools::sandboxing::with_cached_approval;
+use codex_protocol::protocol::ReviewDecision;
+use futures::future::BoxFuture;
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+#[derive(Clone, Debug)]
+pub struct ApplyPatchRequest {
+    pub patch: String,
+    pub cwd: PathBuf,
+    pub timeout_ms: Option<u64>,
+    pub user_explicitly_approved: bool,
+    pub codex_exe: Option<PathBuf>,
+}
+
+#[derive(Default)]
+pub struct ApplyPatchRuntime;
+
+#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
+pub(crate) struct ApprovalKey {
+    patch: String,
+    cwd: PathBuf,
+}
+
+impl ApplyPatchRuntime {
+    pub fn new() -> Self {
+        Self
+    }
+
+    fn build_command_spec(req: &ApplyPatchRequest) -> Result<CommandSpec, ToolError> {
+        use std::env;
+        let exe = if let Some(path) = &req.codex_exe {
+            path.clone()
+        } else {
+            env::current_exe()
+                .map_err(|e| ToolError::Rejected(format!("failed to determine codex exe: {e}")))?
+        };
+        let program = exe.to_string_lossy().to_string();
+        Ok(CommandSpec {
+            program,
+            args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()],
+            cwd: req.cwd.clone(),
+            timeout_ms: req.timeout_ms,
+            // Run apply_patch with a minimal environment for determinism and to avoid leaks.
+            env: HashMap::new(),
+            with_escalated_permissions: None,
+            justification: None,
+        })
+    }
+
+    fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
+        Some(crate::exec::StdoutStream {
+            sub_id: ctx.sub_id.clone(),
+            call_id: ctx.call_id.clone(),
+            tx_event: ctx.session.get_tx_event(),
+        })
+    }
+}
+
+impl Sandboxable for ApplyPatchRuntime {
+    fn sandbox_preference(&self) -> SandboxablePreference {
+        SandboxablePreference::Auto
+    }
+    fn escalate_on_failure(&self) -> bool {
+        true
+    }
+}
+
+impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
+    type ApprovalKey = ApprovalKey;
+
+    fn approval_key(&self, req: &ApplyPatchRequest) -> Self::ApprovalKey {
+        ApprovalKey {
+            patch: req.patch.clone(),
+            cwd: req.cwd.clone(),
+        }
+    }
+
+    fn start_approval_async<'a>(
+        &'a mut self,
+        req: &'a ApplyPatchRequest,
+        ctx: ApprovalCtx<'a>,
+    ) -> BoxFuture<'a, ReviewDecision> {
+        let key = self.approval_key(req);
+        let session = ctx.session;
+        let sub_id = ctx.sub_id.to_string();
+        let call_id = ctx.call_id.to_string();
+        let cwd = req.cwd.clone();
+        let retry_reason = ctx.retry_reason.clone();
+        let user_explicitly_approved = req.user_explicitly_approved;
+        Box::pin(async move {
+            with_cached_approval(&session.services, key, || async move {
+                if let Some(reason) = retry_reason {
+                    session
+                        .request_command_approval(
+                            sub_id,
+                            call_id,
+                            vec!["apply_patch".to_string()],
+                            cwd,
+                            Some(reason),
+                        )
+                        .await
+                } else if user_explicitly_approved {
+                    ReviewDecision::ApprovedForSession
+                } else {
+                    ReviewDecision::Approved
+                }
+            })
+            .await
+        })
+    }
+}
+
+impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
+    async fn run(
+        &mut self,
+        req: &ApplyPatchRequest,
+        attempt: &SandboxAttempt<'_>,
+        ctx: &ToolCtx<'_>,
+    ) -> Result<ExecToolCallOutput, ToolError> {
+        let spec = Self::build_command_spec(req)?;
+        let env = attempt
+            .env_for(&spec)
+            .map_err(|err| ToolError::Codex(err.into()))?;
+        let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
+            .await
+            .map_err(ToolError::Codex)?;
+        Ok(out)
+    }
+}
--- a/codex-rs/core/src/tools/runtimes/mod.rs
+++ b/codex-rs/core/src/tools/runtimes/mod.rs
@@ -0,0 +1,38 @@
+/*
+Module: runtimes
+
+Concrete ToolRuntime implementations for specific tools. Each runtime stays
+small and focused and reuses the orchestrator for approvals + sandbox + retry.
+*/
+use crate::sandboxing::CommandSpec;
+use crate::tools::sandboxing::ToolError;
+use std::collections::HashMap;
+use std::path::Path;
+
+pub mod apply_patch;
+pub mod shell;
+pub mod unified_exec;
+
+/// Shared helper to construct a CommandSpec from a tokenized command line.
+/// Validates that at least a program is present.
+pub(crate) fn build_command_spec(
+    command: &[String],
+    cwd: &Path,
+    env: &HashMap<String, String>,
+    timeout_ms: Option<u64>,
+    with_escalated_permissions: Option<bool>,
+    justification: Option<String>,
+) -> Result<CommandSpec, ToolError> {
+    let (program, args) = command
+        .split_first()
+        .ok_or_else(|| ToolError::Rejected("command args are empty".to_string()))?;
+    Ok(CommandSpec {
+        program: program.clone(),
+        args: args.to_vec(),
+        cwd: cwd.to_path_buf(),
+        env: env.clone(),
+        timeout_ms,
+        with_escalated_permissions,
+        justification,
+    })
+}
--- a/codex-rs/core/src/tools/runtimes/shell.rs
+++ b/codex-rs/core/src/tools/runtimes/shell.rs
@@ -0,0 +1,164 @@
+/*
+Runtime: shell
+
+Executes shell requests under the orchestrator: asks for approval when needed,
+builds a CommandSpec, and runs it under the current SandboxAttempt.
+*/
+use crate::command_safety::is_dangerous_command::command_might_be_dangerous;
+use crate::command_safety::is_safe_command::is_known_safe_command;
+use crate::exec::ExecToolCallOutput;
+use crate::protocol::SandboxPolicy;
+use crate::sandboxing::execute_env;
+use crate::tools::runtimes::build_command_spec;
+use crate::tools::sandboxing::Approvable;
+use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::Sandboxable;
+use crate::tools::sandboxing::SandboxablePreference;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
+use crate::tools::sandboxing::ToolRuntime;
+use crate::tools::sandboxing::with_cached_approval;
+use codex_protocol::protocol::AskForApproval;
+use codex_protocol::protocol::ReviewDecision;
+use futures::future::BoxFuture;
+use std::path::PathBuf;
+
+#[derive(Clone, Debug)]
+pub struct ShellRequest {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+    pub timeout_ms: Option<u64>,
+    pub env: std::collections::HashMap<String, String>,
+    pub with_escalated_permissions: Option<bool>,
+    pub justification: Option<String>,
+}
+
+#[derive(Default)]
+pub struct ShellRuntime;
+
+#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
+pub(crate) struct ApprovalKey {
+    command: Vec<String>,
+    cwd: PathBuf,
+    escalated: bool,
+}
+
+impl ShellRuntime {
+    pub fn new() -> Self {
+        Self
+    }
+
+    fn stdout_stream(ctx: &ToolCtx<'_>) -> Option<crate::exec::StdoutStream> {
+        Some(crate::exec::StdoutStream {
+            sub_id: ctx.sub_id.clone(),
+            call_id: ctx.call_id.clone(),
+            tx_event: ctx.session.get_tx_event(),
+        })
+    }
+}
+
+impl Sandboxable for ShellRuntime {
+    fn sandbox_preference(&self) -> SandboxablePreference {
+        SandboxablePreference::Auto
+    }
+    fn escalate_on_failure(&self) -> bool {
+        true
+    }
+}
+
+impl Approvable<ShellRequest> for ShellRuntime {
+    type ApprovalKey = ApprovalKey;
+
+    fn approval_key(&self, req: &ShellRequest) -> Self::ApprovalKey {
+        ApprovalKey {
+            command: req.command.clone(),
+            cwd: req.cwd.clone(),
+            escalated: req.with_escalated_permissions.unwrap_or(false),
+        }
+    }
+
+    fn start_approval_async<'a>(
+        &'a mut self,
+        req: &'a ShellRequest,
+        ctx: ApprovalCtx<'a>,
+    ) -> BoxFuture<'a, ReviewDecision> {
+        let key = self.approval_key(req);
+        let command = req.command.clone();
+        let cwd = req.cwd.clone();
+        let reason = ctx
+            .retry_reason
+            .clone()
+            .or_else(|| req.justification.clone());
+        let session = ctx.session;
+        let sub_id = ctx.sub_id.to_string();
+        let call_id = ctx.call_id.to_string();
+        Box::pin(async move {
+            with_cached_approval(&session.services, key, || async move {
+                session
+                    .request_command_approval(sub_id, call_id, command, cwd, reason)
+                    .await
+            })
+            .await
+        })
+    }
+
+    fn wants_initial_approval(
+        &self,
+        req: &ShellRequest,
+        policy: AskForApproval,
+        sandbox_policy: &SandboxPolicy,
+    ) -> bool {
+        if is_known_safe_command(&req.command) {
+            return false;
+        }
+        match policy {
+            AskForApproval::Never | AskForApproval::OnFailure => false,
+            AskForApproval::OnRequest => {
+                // In DangerFullAccess, only prompt if the command looks dangerous.
+                if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) {
+                    return command_might_be_dangerous(&req.command);
+                }
+
+                // In restricted sandboxes (ReadOnly/WorkspaceWrite), do not prompt for
+                // non‑escalated, non‑dangerous commands — let the sandbox enforce
+                // restrictions (e.g., block network/write) without a user prompt.
+                let wants_escalation = req.with_escalated_permissions.unwrap_or(false);
+                if wants_escalation {
+                    return true;
+                }
+                command_might_be_dangerous(&req.command)
+            }
+            AskForApproval::UnlessTrusted => !is_known_safe_command(&req.command),
+        }
+    }
+
+    fn wants_escalated_first_attempt(&self, req: &ShellRequest) -> bool {
+        req.with_escalated_permissions.unwrap_or(false)
+    }
+}
+
+impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
+    async fn run(
+        &mut self,
+        req: &ShellRequest,
+        attempt: &SandboxAttempt<'_>,
+        ctx: &ToolCtx<'_>,
+    ) -> Result<ExecToolCallOutput, ToolError> {
+        let spec = build_command_spec(
+            &req.command,
+            &req.cwd,
+            &req.env,
+            req.timeout_ms,
+            req.with_escalated_permissions,
+            req.justification.clone(),
+        )?;
+        let env = attempt
+            .env_for(&spec)
+            .map_err(|err| ToolError::Codex(err.into()))?;
+        let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
+            .await
+            .map_err(ToolError::Codex)?;
+        Ok(out)
+    }
+}
--- a/codex-rs/core/src/tools/runtimes/unified_exec.rs
+++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs
@@ -0,0 +1,123 @@
+/*
+Runtime: unified exec
+
+Handles approval + sandbox orchestration for unified exec requests, delegating to
+the session manager to spawn PTYs once an ExecEnv is prepared.
+*/
+use crate::error::CodexErr;
+use crate::error::SandboxErr;
+use crate::tools::runtimes::build_command_spec;
+use crate::tools::sandboxing::Approvable;
+use crate::tools::sandboxing::ApprovalCtx;
+use crate::tools::sandboxing::SandboxAttempt;
+use crate::tools::sandboxing::Sandboxable;
+use crate::tools::sandboxing::SandboxablePreference;
+use crate::tools::sandboxing::ToolCtx;
+use crate::tools::sandboxing::ToolError;
+use crate::tools::sandboxing::ToolRuntime;
+use crate::tools::sandboxing::with_cached_approval;
+use crate::unified_exec::UnifiedExecError;
+use crate::unified_exec::UnifiedExecSession;
+use crate::unified_exec::UnifiedExecSessionManager;
+use codex_protocol::protocol::ReviewDecision;
+use futures::future::BoxFuture;
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+#[derive(Clone, Debug)]
+pub struct UnifiedExecRequest {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+    pub env: HashMap<String, String>,
+}
+
+#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
+pub struct UnifiedExecApprovalKey {
+    pub command: Vec<String>,
+    pub cwd: PathBuf,
+}
+
+pub struct UnifiedExecRuntime<'a> {
+    manager: &'a UnifiedExecSessionManager,
+}
+
+impl UnifiedExecRequest {
+    pub fn new(command: Vec<String>, cwd: PathBuf, env: HashMap<String, String>) -> Self {
+        Self { command, cwd, env }
+    }
+}
+
+impl<'a> UnifiedExecRuntime<'a> {
+    pub fn new(manager: &'a UnifiedExecSessionManager) -> Self {
+        Self { manager }
+    }
+}
+
+impl Sandboxable for UnifiedExecRuntime<'_> {
+    fn sandbox_preference(&self) -> SandboxablePreference {
+        SandboxablePreference::Auto
+    }
+
+    fn escalate_on_failure(&self) -> bool {
+        true
+    }
+}
+
+impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
+    type ApprovalKey = UnifiedExecApprovalKey;
+
+    fn approval_key(&self, req: &UnifiedExecRequest) -> Self::ApprovalKey {
+        UnifiedExecApprovalKey {
+            command: req.command.clone(),
+            cwd: req.cwd.clone(),
+        }
+    }
+
+    fn start_approval_async<'b>(
+        &'b mut self,
+        req: &'b UnifiedExecRequest,
+        ctx: ApprovalCtx<'b>,
+    ) -> BoxFuture<'b, ReviewDecision> {
+        let key = self.approval_key(req);
+        let session = ctx.session;
+        let sub_id = ctx.sub_id.to_string();
+        let call_id = ctx.call_id.to_string();
+        let command = req.command.clone();
+        let cwd = req.cwd.clone();
+        let reason = ctx.retry_reason.clone();
+        Box::pin(async move {
+            with_cached_approval(&session.services, key, || async move {
+                session
+                    .request_command_approval(sub_id, call_id, command, cwd, reason)
+                    .await
+            })
+            .await
+        })
+    }
+}
+
+impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRuntime<'a> {
+    async fn run(
+        &mut self,
+        req: &UnifiedExecRequest,
+        attempt: &SandboxAttempt<'_>,
+        _ctx: &ToolCtx<'_>,
+    ) -> Result<UnifiedExecSession, ToolError> {
+        let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None)
+            .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
+        let exec_env = attempt
+            .env_for(&spec)
+            .map_err(|err| ToolError::Codex(err.into()))?;
+        self.manager
+            .open_session_with_exec_env(&exec_env)
+            .await
+            .map_err(|err| match err {
+                UnifiedExecError::SandboxDenied { output, .. } => {
+                    ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied {
+                        output: Box::new(output),
+                    }))
+                }
+                other => ToolError::Rejected(other.to_string()),
+            })
+    }
+}
--- a/codex-rs/core/src/tools/sandboxing.rs
+++ b/codex-rs/core/src/tools/sandboxing.rs
@@ -0,0 +1,190 @@
+//! Shared approvals and sandboxing traits used by tool runtimes.
+//!
+//! Consolidates the approval flow primitives (`ApprovalDecision`, `ApprovalStore`,
+//! `ApprovalCtx`, `Approvable`) together with the sandbox orchestration traits
+//! and helpers (`Sandboxable`, `ToolRuntime`, `SandboxAttempt`, etc.).
+
+use crate::codex::Session;
+use crate::error::CodexErr;
+use crate::protocol::SandboxPolicy;
+use crate::sandboxing::CommandSpec;
+use crate::sandboxing::SandboxManager;
+use crate::sandboxing::SandboxTransformError;
+use crate::state::SessionServices;
+use codex_protocol::protocol::AskForApproval;
+use codex_protocol::protocol::ReviewDecision;
+use std::collections::HashMap;
+use std::fmt::Debug;
+use std::hash::Hash;
+use std::path::Path;
+
+use futures::Future;
+use futures::future::BoxFuture;
+use serde::Serialize;
+
+#[derive(Clone, Default, Debug)]
+pub(crate) struct ApprovalStore {
+    // Store serialized keys for generic caching across requests.
+    map: HashMap<String, ReviewDecision>,
+}
+
+impl ApprovalStore {
+    pub fn get<K>(&self, key: &K) -> Option<ReviewDecision>
+    where
+        K: Serialize,
+    {
+        let s = serde_json::to_string(key).ok()?;
+        self.map.get(&s).cloned()
+    }
+
+    pub fn put<K>(&mut self, key: K, value: ReviewDecision)
+    where
+        K: Serialize,
+    {
+        if let Ok(s) = serde_json::to_string(&key) {
+            self.map.insert(s, value);
+        }
+    }
+}
+
+pub(crate) async fn with_cached_approval<K, F, Fut>(
+    services: &SessionServices,
+    key: K,
+    fetch: F,
+) -> ReviewDecision
+where
+    K: Serialize + Clone,
+    F: FnOnce() -> Fut,
+    Fut: Future<Output = ReviewDecision>,
+{
+    {
+        let store = services.tool_approvals.lock().await;
+        if let Some(decision) = store.get(&key) {
+            return decision;
+        }
+    }
+
+    let decision = fetch().await;
+
+    if matches!(decision, ReviewDecision::ApprovedForSession) {
+        let mut store = services.tool_approvals.lock().await;
+        store.put(key, ReviewDecision::ApprovedForSession);
+    }
+
+    decision
+}
+
+#[derive(Clone)]
+pub(crate) struct ApprovalCtx<'a> {
+    pub session: &'a Session,
+    pub sub_id: &'a str,
+    pub call_id: &'a str,
+    pub retry_reason: Option<String>,
+}
+
+pub(crate) trait Approvable<Req> {
+    type ApprovalKey: Hash + Eq + Clone + Debug + Serialize;
+
+    fn approval_key(&self, req: &Req) -> Self::ApprovalKey;
+
+    /// Some tools may request to skip the sandbox on the first attempt
+    /// (e.g., when the request explicitly asks for escalated permissions).
+    /// Defaults to `false`.
+    fn wants_escalated_first_attempt(&self, _req: &Req) -> bool {
+        false
+    }
+
+    fn should_bypass_approval(&self, policy: AskForApproval, already_approved: bool) -> bool {
+        if already_approved {
+            // We do not ask one more time
+            return true;
+        }
+        matches!(policy, AskForApproval::Never)
+    }
+
+    /// Decide whether an initial user approval should be requested before the
+    /// first attempt. Defaults to the orchestrator's behavior (pre‑refactor):
+    /// - Never, OnFailure: do not ask
+    /// - OnRequest: ask unless sandbox policy is DangerFullAccess
+    /// - UnlessTrusted: always ask
+    fn wants_initial_approval(
+        &self,
+        _req: &Req,
+        policy: AskForApproval,
+        sandbox_policy: &SandboxPolicy,
+    ) -> bool {
+        match policy {
+            AskForApproval::Never | AskForApproval::OnFailure => false,
+            AskForApproval::OnRequest => !matches!(sandbox_policy, SandboxPolicy::DangerFullAccess),
+            AskForApproval::UnlessTrusted => true,
+        }
+    }
+
+    fn start_approval_async<'a>(
+        &'a mut self,
+        req: &'a Req,
+        ctx: ApprovalCtx<'a>,
+    ) -> BoxFuture<'a, ReviewDecision>;
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) enum SandboxablePreference {
+    Auto,
+    #[allow(dead_code)] // Will be used by later tools.
+    Require,
+    #[allow(dead_code)] // Will be used by later tools.
+    Forbid,
+}
+
+pub(crate) trait Sandboxable {
+    fn sandbox_preference(&self) -> SandboxablePreference;
+    fn escalate_on_failure(&self) -> bool {
+        true
+    }
+}
+
+pub(crate) struct ToolCtx<'a> {
+    pub session: &'a Session,
+    pub sub_id: String,
+    pub call_id: String,
+    pub tool_name: String,
+}
+
+#[derive(Debug)]
+pub(crate) enum ToolError {
+    Rejected(String),
+    SandboxDenied(String),
+    Codex(CodexErr),
+}
+
+pub(crate) trait ToolRuntime<Req, Out>: Approvable<Req> + Sandboxable {
+    async fn run(
+        &mut self,
+        req: &Req,
+        attempt: &SandboxAttempt<'_>,
+        ctx: &ToolCtx,
+    ) -> Result<Out, ToolError>;
+}
+
+pub(crate) struct SandboxAttempt<'a> {
+    pub sandbox: crate::exec::SandboxType,
+    pub policy: &'a crate::protocol::SandboxPolicy,
+    pub(crate) manager: &'a SandboxManager,
+    pub(crate) sandbox_cwd: &'a Path,
+    pub codex_linux_sandbox_exe: Option<&'a std::path::PathBuf>,
+}
+
+impl<'a> SandboxAttempt<'a> {
+    pub fn env_for(
+        &self,
+        spec: &CommandSpec,
+    ) -> Result<crate::sandboxing::ExecEnv, SandboxTransformError> {
+        self.manager.transform(
+            spec,
+            self.policy,
+            self.sandbox,
+            self.sandbox_cwd,
+            self.codex_linux_sandbox_exe,
+        )
+    }
+}
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -815,12 +815,7 @@ pub(crate) fn build_specs(
    config: &ToolsConfig,
    mcp_tools: Option<HashMap<String, mcp_types::Tool>>,
 ) -> ToolRegistryBuilder {
-    use crate::exec_command::EXEC_COMMAND_TOOL_NAME;
-    use crate::exec_command::WRITE_STDIN_TOOL_NAME;
-    use crate::exec_command::create_exec_command_tool_for_responses_api;
-    use crate::exec_command::create_write_stdin_tool_for_responses_api;
    use crate::tools::handlers::ApplyPatchHandler;
-    use crate::tools::handlers::ExecStreamHandler;
    use crate::tools::handlers::GrepFilesHandler;
    use crate::tools::handlers::ListDirHandler;
    use crate::tools::handlers::McpHandler;
@@ -836,7 +831,6 @@ pub(crate) fn build_specs(
    let mut builder = ToolRegistryBuilder::new();

    let shell_handler = Arc::new(ShellHandler);
-    let exec_stream_handler = Arc::new(ExecStreamHandler);
    let unified_exec_handler = Arc::new(UnifiedExecHandler);
    let plan_handler = Arc::new(PlanHandler);
    let apply_patch_handler = Arc::new(ApplyPatchHandler);
@@ -844,7 +838,10 @@ pub(crate) fn build_specs(
    let mcp_handler = Arc::new(McpHandler);
    let mcp_resource_handler = Arc::new(McpResourceHandler);

-    if config.experimental_unified_exec_tool {
+    let use_unified_exec = config.experimental_unified_exec_tool
+        || matches!(config.shell_type, ConfigShellToolType::Streamable);
+
+    if use_unified_exec {
        builder.push_spec(create_unified_exec_tool());
        builder.register_handler("unified_exec", unified_exec_handler);
    } else {
@@ -856,14 +853,7 @@ pub(crate) fn build_specs(
                builder.push_spec(ToolSpec::LocalShell {});
            }
            ConfigShellToolType::Streamable => {
-                builder.push_spec(ToolSpec::Function(
-                    create_exec_command_tool_for_responses_api(),
-                ));
-                builder.push_spec(ToolSpec::Function(
-                    create_write_stdin_tool_for_responses_api(),
-                ));
-                builder.register_handler(EXEC_COMMAND_TOOL_NAME, exec_stream_handler.clone());
-                builder.register_handler(WRITE_STDIN_TOOL_NAME, exec_stream_handler);
+                // Already handled by use_unified_exec.
            }
        }
    }