Unified execution (#3288)

## Unified PTY-Based Exec Tool Note: this requires to have this flag in the config: `use_experimental_unified_exec_tool=true` - Adds a PTY-backed interactive exec feature (“unified_exec”) with session reuse via session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s). - Protocol: introduces ResponseItem::UnifiedExec { session_id, arguments, timeout_ms }. - Tools: exposes unified_exec as a function tool (Responses API); excluded from Chat Completions payload while still supported in tool lists. - Path handling: resolves commands via PATH (or explicit paths), with UTF‑8/newline‑aware truncation (truncate_middle). - Tests: cover command parsing, path resolution, session persistence/cleanup, multi‑session isolation, timeouts, and truncation behavior.
2025-09-10 17:38:11 -07:00
parent 65f3528cad
commit c09ed74a16
13 changed files with 1088 additions and 185 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -26,6 +26,7 @@ use codex_protocol::protocol::TurnAbortReason;
 use codex_protocol::protocol::TurnAbortedEvent;
 use futures::prelude::*;
 use mcp_types::CallToolResult;
+use serde::Deserialize;
 use serde::Serialize;
 use serde_json;
 use tokio::sync::oneshot;
@@ -112,6 +113,7 @@ use crate::safety::assess_command_safety;
 use crate::safety::assess_safety_for_untrusted_command;
 use crate::shell;
 use crate::turn_diff_tracker::TurnDiffTracker;
+use crate::unified_exec::UnifiedExecSessionManager;
 use crate::user_instructions::UserInstructions;
 use crate::user_notification::UserNotification;
 use crate::util::backoff;
@@ -280,6 +282,7 @@ pub(crate) struct Session {
    /// Manager for external MCP servers/tools.
    mcp_connection_manager: McpConnectionManager,
    session_manager: ExecSessionManager,
+    unified_exec_manager: UnifiedExecSessionManager,

    /// External notifier command (will be passed as args to exec()). When
    /// `None` this feature is disabled.
@@ -471,6 +474,7 @@ impl Session {
                include_web_search_request: config.tools_web_search_request,
                use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
                include_view_image_tool: config.include_view_image_tool,
+                experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
            }),
            user_instructions,
            base_instructions,
@@ -484,6 +488,7 @@ impl Session {
            tx_event: tx_event.clone(),
            mcp_connection_manager,
            session_manager: ExecSessionManager::default(),
+            unified_exec_manager: UnifiedExecSessionManager::default(),
            notify,
            state: Mutex::new(state),
            rollout: Mutex::new(Some(rollout_recorder)),
@@ -1149,6 +1154,7 @@ async fn submission_loop(
                    include_web_search_request: config.tools_web_search_request,
                    use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
                    include_view_image_tool: config.include_view_image_tool,
+                    experimental_unified_exec_tool: config.use_experimental_unified_exec_tool,
                });

                let new_turn_context = TurnContext {
@@ -1251,6 +1257,8 @@ async fn submission_loop(
                            use_streamable_shell_tool: config
                                .use_experimental_streamable_shell_tool,
                            include_view_image_tool: config.include_view_image_tool,
+                            experimental_unified_exec_tool: config
+                                .use_experimental_unified_exec_tool,
                        }),
                        user_instructions: turn_context.user_instructions.clone(),
                        base_instructions: turn_context.base_instructions.clone(),
@@ -2082,6 +2090,72 @@ async fn handle_response_item(
    Ok(output)
 }

+async fn handle_unified_exec_tool_call(
+    sess: &Session,
+    call_id: String,
+    session_id: Option<String>,
+    arguments: Vec<String>,
+    timeout_ms: Option<u64>,
+) -> ResponseInputItem {
+    let parsed_session_id = if let Some(session_id) = session_id {
+        match session_id.parse::<i32>() {
+            Ok(parsed) => Some(parsed),
+            Err(output) => {
+                return ResponseInputItem::FunctionCallOutput {
+                    call_id: call_id.to_string(),
+                    output: FunctionCallOutputPayload {
+                        content: format!("invalid session_id: {session_id} due to error {output}"),
+                        success: Some(false),
+                    },
+                };
+            }
+        }
+    } else {
+        None
+    };
+
+    let request = crate::unified_exec::UnifiedExecRequest {
+        session_id: parsed_session_id,
+        input_chunks: &arguments,
+        timeout_ms,
+    };
+
+    let result = sess.unified_exec_manager.handle_request(request).await;
+
+    let output_payload = match result {
+        Ok(value) => {
+            #[derive(Serialize)]
+            struct SerializedUnifiedExecResult<'a> {
+                session_id: Option<String>,
+                output: &'a str,
+            }
+
+            match serde_json::to_string(&SerializedUnifiedExecResult {
+                session_id: value.session_id.map(|id| id.to_string()),
+                output: &value.output,
+            }) {
+                Ok(serialized) => FunctionCallOutputPayload {
+                    content: serialized,
+                    success: Some(true),
+                },
+                Err(err) => FunctionCallOutputPayload {
+                    content: format!("failed to serialize unified exec output: {err}"),
+                    success: Some(false),
+                },
+            }
+        }
+        Err(err) => FunctionCallOutputPayload {
+            content: format!("unified exec failed: {err}"),
+            success: Some(false),
+        },
+    };
+
+    ResponseInputItem::FunctionCallOutput {
+        call_id,
+        output: output_payload,
+    }
+}
+
 async fn handle_function_call(
    sess: &Session,
    turn_context: &TurnContext,
@@ -2109,6 +2183,38 @@ async fn handle_function_call(
            )
            .await
        }
+        "unified_exec" => {
+            #[derive(Deserialize)]
+            struct UnifiedExecArgs {
+                input: Vec<String>,
+                #[serde(default)]
+                session_id: Option<String>,
+                #[serde(default)]
+                timeout_ms: Option<u64>,
+            }
+
+            let args = match serde_json::from_str::<UnifiedExecArgs>(&arguments) {
+                Ok(args) => args,
+                Err(err) => {
+                    return ResponseInputItem::FunctionCallOutput {
+                        call_id,
+                        output: FunctionCallOutputPayload {
+                            content: format!("failed to parse function arguments: {err}"),
+                            success: Some(false),
+                        },
+                    };
+                }
+            };
+
+            handle_unified_exec_tool_call(
+                sess,
+                call_id,
+                args.session_id,
+                args.input,
+                args.timeout_ms,
+            )
+            .await
+        }
        "view_image" => {
            #[derive(serde::Deserialize)]
            struct SeeImageArgs {