Unified execution (#3288)

## Unified PTY-Based Exec Tool Note: this requires to have this flag in the config: `use_experimental_unified_exec_tool=true` - Adds a PTY-backed interactive exec feature (“unified_exec”) with session reuse via session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s). - Protocol: introduces ResponseItem::UnifiedExec { session_id, arguments, timeout_ms }. - Tools: exposes unified_exec as a function tool (Responses API); excluded from Chat Completions payload while still supported in tool lists. - Path handling: resolves commands via PATH (or explicit paths), with UTF‑8/newline‑aware truncation (truncate_middle). - Tests: cover command parsing, path resolution, session persistence/cleanup, multi‑session isolation, timeouts, and truncation behavior.
2025-09-10 17:38:11 -07:00
parent 65f3528cad
commit c09ed74a16
13 changed files with 1088 additions and 185 deletions
--- a/codex-rs/core/src/exec_command/exec_command_session.rs
+++ b/codex-rs/core/src/exec_command/exec_command_session.rs
@@ -24,6 +24,9 @@ pub(crate) struct ExecCommandSession {

    /// JoinHandle for the child wait task.
    wait_handle: StdMutex<Option<JoinHandle<()>>>,
+
+    /// Tracks whether the underlying process has exited.
+    exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
 }

 impl ExecCommandSession {
@@ -34,6 +37,7 @@ impl ExecCommandSession {
        reader_handle: JoinHandle<()>,
        writer_handle: JoinHandle<()>,
        wait_handle: JoinHandle<()>,
+        exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
    ) -> Self {
        Self {
            writer_tx,
@@ -42,6 +46,7 @@ impl ExecCommandSession {
            reader_handle: StdMutex::new(Some(reader_handle)),
            writer_handle: StdMutex::new(Some(writer_handle)),
            wait_handle: StdMutex::new(Some(wait_handle)),
+            exit_status,
        }
    }

@@ -52,6 +57,10 @@ impl ExecCommandSession {
    pub(crate) fn output_receiver(&self) -> broadcast::Receiver<Vec<u8>> {
        self.output_tx.subscribe()
    }
+
+    pub(crate) fn has_exited(&self) -> bool {
+        self.exit_status.load(std::sync::atomic::Ordering::SeqCst)
+    }
 }

 impl Drop for ExecCommandSession {
--- a/codex-rs/core/src/exec_command/mod.rs
+++ b/codex-rs/core/src/exec_command/mod.rs
@@ -6,6 +6,7 @@ mod session_manager;

 pub use exec_command_params::ExecCommandParams;
 pub use exec_command_params::WriteStdinParams;
+pub(crate) use exec_command_session::ExecCommandSession;
 pub use responses_api::EXEC_COMMAND_TOOL_NAME;
 pub use responses_api::WRITE_STDIN_TOOL_NAME;
 pub use responses_api::create_exec_command_tool_for_responses_api;
--- a/codex-rs/core/src/exec_command/session_manager.rs
+++ b/codex-rs/core/src/exec_command/session_manager.rs
@@ -3,6 +3,7 @@ use std::io::ErrorKind;
 use std::io::Read;
 use std::sync::Arc;
 use std::sync::Mutex as StdMutex;
+use std::sync::atomic::AtomicBool;
 use std::sync::atomic::AtomicU32;

 use portable_pty::CommandBuilder;
@@ -19,6 +20,7 @@ use crate::exec_command::exec_command_params::ExecCommandParams;
 use crate::exec_command::exec_command_params::WriteStdinParams;
 use crate::exec_command::exec_command_session::ExecCommandSession;
 use crate::exec_command::session_id::SessionId;
+use crate::truncate::truncate_middle;
 use codex_protocol::models::FunctionCallOutputPayload;

 #[derive(Debug, Default)]
@@ -327,11 +329,14 @@ async fn create_exec_command_session(

    // Keep the child alive until it exits, then signal exit code.
    let (exit_tx, exit_rx) = oneshot::channel::<i32>();
+    let exit_status = Arc::new(AtomicBool::new(false));
+    let wait_exit_status = exit_status.clone();
    let wait_handle = tokio::task::spawn_blocking(move || {
        let code = match child.wait() {
            Ok(status) => status.exit_code() as i32,
            Err(_) => -1,
        };
+        wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst);
        let _ = exit_tx.send(code);
    });

@@ -343,116 +348,11 @@ async fn create_exec_command_session(
        reader_handle,
        writer_handle,
        wait_handle,
+        exit_status,
    );
    Ok((session, exit_rx))
 }

-/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
-/// preserving the beginning and the end. Returns the possibly truncated
-/// string and `Some(original_token_count)` (estimated at 4 bytes/token)
-/// if truncation occurred; otherwise returns the original string and `None`.
-fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>) {
-    // No truncation needed
-    if s.len() <= max_bytes {
-        return (s.to_string(), None);
-    }
-    let est_tokens = (s.len() as u64).div_ceil(4);
-    if max_bytes == 0 {
-        // Cannot keep any content; still return a full marker (never truncated).
-        return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
-    }
-
-    // Helper to truncate a string to a given byte length on a char boundary.
-    fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
-        if input.len() <= max_len {
-            return input;
-        }
-        let mut end = max_len;
-        while end > 0 && !input.is_char_boundary(end) {
-            end -= 1;
-        }
-        &input[..end]
-    }
-
-    // Given a left/right budget, prefer newline boundaries; otherwise fall back
-    // to UTF-8 char boundaries.
-    fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
-        if let Some(head) = s.get(..left_budget)
-            && let Some(i) = head.rfind('\n')
-        {
-            return i + 1; // keep the newline so suffix starts on a fresh line
-        }
-        truncate_on_boundary(s, left_budget).len()
-    }
-
-    fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
-        let start_tail = s.len().saturating_sub(right_budget);
-        if let Some(tail) = s.get(start_tail..)
-            && let Some(i) = tail.find('\n')
-        {
-            return start_tail + i + 1; // start after newline
-        }
-        // Fall back to a char boundary at or after start_tail.
-        let mut idx = start_tail.min(s.len());
-        while idx < s.len() && !s.is_char_boundary(idx) {
-            idx += 1;
-        }
-        idx
-    }
-
-    // Refine marker length and budgets until stable. Marker is never truncated.
-    let mut guess_tokens = est_tokens; // worst-case: everything truncated
-    for _ in 0..4 {
-        let marker = format!("…{guess_tokens} tokens truncated…");
-        let marker_len = marker.len();
-        let keep_budget = max_bytes.saturating_sub(marker_len);
-        if keep_budget == 0 {
-            // No room for any content within the cap; return a full, untruncated marker
-            // that reflects the entire truncated content.
-            return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
-        }
-
-        let left_budget = keep_budget / 2;
-        let right_budget = keep_budget - left_budget;
-        let prefix_end = pick_prefix_end(s, left_budget);
-        let mut suffix_start = pick_suffix_start(s, right_budget);
-        if suffix_start < prefix_end {
-            suffix_start = prefix_end;
-        }
-        let kept_content_bytes = prefix_end + (s.len() - suffix_start);
-        let truncated_content_bytes = s.len().saturating_sub(kept_content_bytes);
-        let new_tokens = (truncated_content_bytes as u64).div_ceil(4);
-        if new_tokens == guess_tokens {
-            let mut out = String::with_capacity(marker_len + kept_content_bytes + 1);
-            out.push_str(&s[..prefix_end]);
-            out.push_str(&marker);
-            // Place marker on its own line for symmetry when we keep line boundaries.
-            out.push('\n');
-            out.push_str(&s[suffix_start..]);
-            return (out, Some(est_tokens));
-        }
-        guess_tokens = new_tokens;
-    }
-
-    // Fallback: use last guess to build output.
-    let marker = format!("…{guess_tokens} tokens truncated…");
-    let marker_len = marker.len();
-    let keep_budget = max_bytes.saturating_sub(marker_len);
-    if keep_budget == 0 {
-        return (format!("…{est_tokens} tokens truncated…"), Some(est_tokens));
-    }
-    let left_budget = keep_budget / 2;
-    let right_budget = keep_budget - left_budget;
-    let prefix_end = pick_prefix_end(s, left_budget);
-    let suffix_start = pick_suffix_start(s, right_budget);
-    let mut out = String::with_capacity(marker_len + prefix_end + (s.len() - suffix_start) + 1);
-    out.push_str(&s[..prefix_end]);
-    out.push_str(&marker);
-    out.push('\n');
-    out.push_str(&s[suffix_start..]);
-    (out, Some(est_tokens))
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -616,50 +516,4 @@ Output:
 abc"#;
        assert_eq!(expected, text);
    }
-
-    #[test]
-    fn truncate_middle_no_newlines_fallback() {
-        // A long string with no newlines that exceeds the cap.
-        let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-        let max_bytes = 16; // force truncation
-        let (out, original) = truncate_middle(s, max_bytes);
-        // For very small caps, we return the full, untruncated marker,
-        // even if it exceeds the cap.
-        assert_eq!(out, "…16 tokens truncated…");
-        // Original string length is 62 bytes => ceil(62/4) = 16 tokens.
-        assert_eq!(original, Some(16));
-    }
-
-    #[test]
-    fn truncate_middle_prefers_newline_boundaries() {
-        // Build a multi-line string of 20 numbered lines (each "NNN\n").
-        let mut s = String::new();
-        for i in 1..=20 {
-            s.push_str(&format!("{i:03}\n"));
-        }
-        // Total length: 20 lines * 4 bytes per line = 80 bytes.
-        assert_eq!(s.len(), 80);
-
-        // Choose a cap that forces truncation while leaving room for
-        // a few lines on each side after accounting for the marker.
-        let max_bytes = 64;
-        // Expect exact output: first 4 lines, marker, last 4 lines, and correct token estimate (80/4 = 20).
-        assert_eq!(
-            truncate_middle(&s, max_bytes),
-            (
-                r#"001
-002
-003
-004
-…12 tokens truncated…
-017
-018
-019
-020
-"#
-                .to_string(),
-                Some(20)
-            )
-        );
-    }
 }