Unified execution (#3288)

## Unified PTY-Based Exec Tool

Note: this requires to have this flag in the config:
`use_experimental_unified_exec_tool=true`

- Adds a PTY-backed interactive exec feature (“unified_exec”) with
session reuse via
  session_id, bounded output (128 KiB), and timeout clamping (≤ 60 s).
- Protocol: introduces ResponseItem::UnifiedExec { session_id,
arguments, timeout_ms }.
- Tools: exposes unified_exec as a function tool (Responses API);
excluded from Chat
  Completions payload while still supported in tool lists.
- Path handling: resolves commands via PATH (or explicit paths), with
UTF‑8/newline‑aware
  truncation (truncate_middle).
- Tests: cover command parsing, path resolution, session
persistence/cleanup, multi‑session
  isolation, timeouts, and truncation behavior.
This commit is contained in:
jif-oai
2025-09-10 17:38:11 -07:00
committed by GitHub
parent 65f3528cad
commit c09ed74a16
13 changed files with 1088 additions and 185 deletions

View File

@@ -24,6 +24,9 @@ pub(crate) struct ExecCommandSession {
/// JoinHandle for the child wait task.
wait_handle: StdMutex<Option<JoinHandle<()>>>,
/// Tracks whether the underlying process has exited.
exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
}
impl ExecCommandSession {
@@ -34,6 +37,7 @@ impl ExecCommandSession {
reader_handle: JoinHandle<()>,
writer_handle: JoinHandle<()>,
wait_handle: JoinHandle<()>,
exit_status: std::sync::Arc<std::sync::atomic::AtomicBool>,
) -> Self {
Self {
writer_tx,
@@ -42,6 +46,7 @@ impl ExecCommandSession {
reader_handle: StdMutex::new(Some(reader_handle)),
writer_handle: StdMutex::new(Some(writer_handle)),
wait_handle: StdMutex::new(Some(wait_handle)),
exit_status,
}
}
@@ -52,6 +57,10 @@ impl ExecCommandSession {
pub(crate) fn output_receiver(&self) -> broadcast::Receiver<Vec<u8>> {
self.output_tx.subscribe()
}
pub(crate) fn has_exited(&self) -> bool {
self.exit_status.load(std::sync::atomic::Ordering::SeqCst)
}
}
impl Drop for ExecCommandSession {

View File

@@ -6,6 +6,7 @@ mod session_manager;
pub use exec_command_params::ExecCommandParams;
pub use exec_command_params::WriteStdinParams;
pub(crate) use exec_command_session::ExecCommandSession;
pub use responses_api::EXEC_COMMAND_TOOL_NAME;
pub use responses_api::WRITE_STDIN_TOOL_NAME;
pub use responses_api::create_exec_command_tool_for_responses_api;

View File

@@ -3,6 +3,7 @@ use std::io::ErrorKind;
use std::io::Read;
use std::sync::Arc;
use std::sync::Mutex as StdMutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU32;
use portable_pty::CommandBuilder;
@@ -19,6 +20,7 @@ use crate::exec_command::exec_command_params::ExecCommandParams;
use crate::exec_command::exec_command_params::WriteStdinParams;
use crate::exec_command::exec_command_session::ExecCommandSession;
use crate::exec_command::session_id::SessionId;
use crate::truncate::truncate_middle;
use codex_protocol::models::FunctionCallOutputPayload;
#[derive(Debug, Default)]
@@ -327,11 +329,14 @@ async fn create_exec_command_session(
// Keep the child alive until it exits, then signal exit code.
let (exit_tx, exit_rx) = oneshot::channel::<i32>();
let exit_status = Arc::new(AtomicBool::new(false));
let wait_exit_status = exit_status.clone();
let wait_handle = tokio::task::spawn_blocking(move || {
let code = match child.wait() {
Ok(status) => status.exit_code() as i32,
Err(_) => -1,
};
wait_exit_status.store(true, std::sync::atomic::Ordering::SeqCst);
let _ = exit_tx.send(code);
});
@@ -343,116 +348,11 @@ async fn create_exec_command_session(
reader_handle,
writer_handle,
wait_handle,
exit_status,
);
Ok((session, exit_rx))
}
/// Truncate the middle of a UTF-8 string to at most `max_bytes` bytes,
/// preserving the beginning and the end. Returns the possibly truncated
/// string and `Some(original_token_count)` (estimated at 4 bytes/token)
/// if truncation occurred; otherwise returns the original string and `None`.
fn truncate_middle(s: &str, max_bytes: usize) -> (String, Option<u64>) {
// No truncation needed
if s.len() <= max_bytes {
return (s.to_string(), None);
}
let est_tokens = (s.len() as u64).div_ceil(4);
if max_bytes == 0 {
// Cannot keep any content; still return a full marker (never truncated).
return (format!("{est_tokens} tokens truncated…"), Some(est_tokens));
}
// Helper to truncate a string to a given byte length on a char boundary.
fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
if input.len() <= max_len {
return input;
}
let mut end = max_len;
while end > 0 && !input.is_char_boundary(end) {
end -= 1;
}
&input[..end]
}
// Given a left/right budget, prefer newline boundaries; otherwise fall back
// to UTF-8 char boundaries.
fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
if let Some(head) = s.get(..left_budget)
&& let Some(i) = head.rfind('\n')
{
return i + 1; // keep the newline so suffix starts on a fresh line
}
truncate_on_boundary(s, left_budget).len()
}
fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
let start_tail = s.len().saturating_sub(right_budget);
if let Some(tail) = s.get(start_tail..)
&& let Some(i) = tail.find('\n')
{
return start_tail + i + 1; // start after newline
}
// Fall back to a char boundary at or after start_tail.
let mut idx = start_tail.min(s.len());
while idx < s.len() && !s.is_char_boundary(idx) {
idx += 1;
}
idx
}
// Refine marker length and budgets until stable. Marker is never truncated.
let mut guess_tokens = est_tokens; // worst-case: everything truncated
for _ in 0..4 {
let marker = format!("{guess_tokens} tokens truncated…");
let marker_len = marker.len();
let keep_budget = max_bytes.saturating_sub(marker_len);
if keep_budget == 0 {
// No room for any content within the cap; return a full, untruncated marker
// that reflects the entire truncated content.
return (format!("{est_tokens} tokens truncated…"), Some(est_tokens));
}
let left_budget = keep_budget / 2;
let right_budget = keep_budget - left_budget;
let prefix_end = pick_prefix_end(s, left_budget);
let mut suffix_start = pick_suffix_start(s, right_budget);
if suffix_start < prefix_end {
suffix_start = prefix_end;
}
let kept_content_bytes = prefix_end + (s.len() - suffix_start);
let truncated_content_bytes = s.len().saturating_sub(kept_content_bytes);
let new_tokens = (truncated_content_bytes as u64).div_ceil(4);
if new_tokens == guess_tokens {
let mut out = String::with_capacity(marker_len + kept_content_bytes + 1);
out.push_str(&s[..prefix_end]);
out.push_str(&marker);
// Place marker on its own line for symmetry when we keep line boundaries.
out.push('\n');
out.push_str(&s[suffix_start..]);
return (out, Some(est_tokens));
}
guess_tokens = new_tokens;
}
// Fallback: use last guess to build output.
let marker = format!("{guess_tokens} tokens truncated…");
let marker_len = marker.len();
let keep_budget = max_bytes.saturating_sub(marker_len);
if keep_budget == 0 {
return (format!("{est_tokens} tokens truncated…"), Some(est_tokens));
}
let left_budget = keep_budget / 2;
let right_budget = keep_budget - left_budget;
let prefix_end = pick_prefix_end(s, left_budget);
let suffix_start = pick_suffix_start(s, right_budget);
let mut out = String::with_capacity(marker_len + prefix_end + (s.len() - suffix_start) + 1);
out.push_str(&s[..prefix_end]);
out.push_str(&marker);
out.push('\n');
out.push_str(&s[suffix_start..]);
(out, Some(est_tokens))
}
#[cfg(test)]
mod tests {
use super::*;
@@ -616,50 +516,4 @@ Output:
abc"#;
assert_eq!(expected, text);
}
#[test]
fn truncate_middle_no_newlines_fallback() {
// A long string with no newlines that exceeds the cap.
let s = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
let max_bytes = 16; // force truncation
let (out, original) = truncate_middle(s, max_bytes);
// For very small caps, we return the full, untruncated marker,
// even if it exceeds the cap.
assert_eq!(out, "…16 tokens truncated…");
// Original string length is 62 bytes => ceil(62/4) = 16 tokens.
assert_eq!(original, Some(16));
}
#[test]
fn truncate_middle_prefers_newline_boundaries() {
// Build a multi-line string of 20 numbered lines (each "NNN\n").
let mut s = String::new();
for i in 1..=20 {
s.push_str(&format!("{i:03}\n"));
}
// Total length: 20 lines * 4 bytes per line = 80 bytes.
assert_eq!(s.len(), 80);
// Choose a cap that forces truncation while leaving room for
// a few lines on each side after accounting for the marker.
let max_bytes = 64;
// Expect exact output: first 4 lines, marker, last 4 lines, and correct token estimate (80/4 = 20).
assert_eq!(
truncate_middle(&s, max_bytes),
(
r#"001
002
003
004
…12 tokens truncated…
017
018
019
020
"#
.to_string(),
Some(20)
)
);
}
}