chore: rework tools execution workflow (#5278)

Re-work the tool execution flow. Read `orchestrator.rs` to understand
the structure
This commit is contained in:
jif-oai
2025-10-20 20:57:37 +01:00
committed by GitHub
parent c84fc83222
commit 5e4f3bbb0b
59 changed files with 2630 additions and 3374 deletions

View File

@@ -36,6 +36,7 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
arg0: None,
};
let policy = SandboxPolicy::new_read_only_policy();

View File

@@ -1,229 +0,0 @@
#![cfg(unix)]
use std::collections::HashMap;
use std::path::PathBuf;
use std::time::Duration;
use async_channel::Receiver;
use codex_core::error::CodexErr;
use codex_core::error::SandboxErr;
use codex_core::exec::ExecParams;
use codex_core::exec::SandboxType;
use codex_core::exec::StdoutStream;
use codex_core::exec::process_exec_tool_call;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandOutputDeltaEvent;
use codex_core::protocol::ExecOutputStream;
use codex_core::protocol::SandboxPolicy;
fn collect_stdout_events(rx: Receiver<Event>) -> Vec<u8> {
let mut out = Vec::new();
while let Ok(ev) = rx.try_recv() {
if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
stream: ExecOutputStream::Stdout,
chunk,
..
}) = ev.msg
{
out.extend_from_slice(&chunk);
}
}
out
}
#[tokio::test]
async fn test_exec_stdout_stream_events_echo() {
let (tx, rx) = async_channel::unbounded::<Event>();
let stdout_stream = StdoutStream {
sub_id: "test-sub".to_string(),
call_id: "call-1".to_string(),
tx_event: tx,
};
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
// Use printf for predictable behavior across shells
"printf 'hello-world\n'".to_string(),
];
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let params = ExecParams {
command: cmd,
cwd: cwd.clone(),
timeout_ms: Some(5_000),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
&policy,
cwd.as_path(),
&None,
Some(stdout_stream),
)
.await;
let result = match result {
Ok(r) => r,
Err(e) => panic!("process_exec_tool_call failed: {e}"),
};
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout.text, "hello-world\n");
let streamed = collect_stdout_events(rx);
// We should have received at least the same contents (possibly in one chunk)
assert_eq!(String::from_utf8_lossy(&streamed), "hello-world\n");
}
#[tokio::test]
async fn test_exec_stderr_stream_events_echo() {
let (tx, rx) = async_channel::unbounded::<Event>();
let stdout_stream = StdoutStream {
sub_id: "test-sub".to_string(),
call_id: "call-2".to_string(),
tx_event: tx,
};
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
// Write to stderr explicitly
"printf 'oops\n' 1>&2".to_string(),
];
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let params = ExecParams {
command: cmd,
cwd: cwd.clone(),
timeout_ms: Some(5_000),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
&policy,
cwd.as_path(),
&None,
Some(stdout_stream),
)
.await;
let result = match result {
Ok(r) => r,
Err(e) => panic!("process_exec_tool_call failed: {e}"),
};
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout.text, "");
assert_eq!(result.stderr.text, "oops\n");
// Collect only stderr delta events
let mut err = Vec::new();
while let Ok(ev) = rx.try_recv() {
if let EventMsg::ExecCommandOutputDelta(ExecCommandOutputDeltaEvent {
stream: ExecOutputStream::Stderr,
chunk,
..
}) = ev.msg
{
err.extend_from_slice(&chunk);
}
}
assert_eq!(String::from_utf8_lossy(&err), "oops\n");
}
#[tokio::test]
async fn test_aggregated_output_interleaves_in_order() {
// Spawn a shell that alternates stdout and stderr with sleeps to enforce order.
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'O1\\n'; sleep 0.01; printf 'E1\\n' 1>&2; sleep 0.01; printf 'O2\\n'; sleep 0.01; printf 'E2\\n' 1>&2".to_string(),
];
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let params = ExecParams {
command: cmd,
cwd: cwd.clone(),
timeout_ms: Some(5_000),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
&policy,
cwd.as_path(),
&None,
None,
)
.await
.expect("process_exec_tool_call");
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout.text, "O1\nO2\n");
assert_eq!(result.stderr.text, "E1\nE2\n");
assert_eq!(result.aggregated_output.text, "O1\nE1\nO2\nE2\n");
assert_eq!(result.aggregated_output.truncated_after_lines, None);
}
#[tokio::test]
async fn test_exec_timeout_returns_partial_output() {
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
"printf 'before\\n'; sleep 2; printf 'after\\n'".to_string(),
];
let cwd = std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
let params = ExecParams {
command: cmd,
cwd: cwd.clone(),
timeout_ms: Some(200),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,
};
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
&policy,
cwd.as_path(),
&None,
None,
)
.await;
let Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) = result else {
panic!("expected timeout error");
};
assert_eq!(output.exit_code, 124);
assert_eq!(output.stdout.text, "before\n");
assert!(output.stderr.text.is_empty());
assert_eq!(output.aggregated_output.text, "before\n");
assert!(output.duration >= Duration::from_millis(200));
assert!(output.timed_out);
}

View File

@@ -9,7 +9,6 @@ mod client;
mod compact;
mod compact_resume_fork;
mod exec;
mod exec_stream_events;
mod fork_conversation;
mod grep_files;
mod json_result;

View File

@@ -815,11 +815,7 @@ async fn handle_container_exec_user_approved_records_tool_decision() {
mount_sse(
&server,
sse(vec![
ev_local_shell_call(
"user_approved_call",
"completed",
vec!["/bin/echo", "approved"],
),
ev_local_shell_call("user_approved_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -881,11 +877,7 @@ async fn handle_container_exec_user_approved_for_session_records_tool_decision()
mount_sse(
&server,
sse(vec![
ev_local_shell_call(
"user_approved_session_call",
"completed",
vec!["/bin/echo", "persist"],
),
ev_local_shell_call("user_approved_session_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -947,11 +939,7 @@ async fn handle_sandbox_error_user_approves_retry_records_tool_decision() {
mount_sse(
&server,
sse(vec![
ev_local_shell_call(
"sandbox_retry_call",
"completed",
vec!["/bin/echo", "retry"],
),
ev_local_shell_call("sandbox_retry_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -1013,7 +1001,7 @@ async fn handle_container_exec_user_denies_records_tool_decision() {
mount_sse(
&server,
sse(vec![
ev_local_shell_call("user_denied_call", "completed", vec!["/bin/echo", "deny"]),
ev_local_shell_call("user_denied_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -1075,11 +1063,7 @@ async fn handle_sandbox_error_user_approves_for_session_records_tool_decision()
mount_sse(
&server,
sse(vec![
ev_local_shell_call(
"sandbox_session_call",
"completed",
vec!["/bin/echo", "persist"],
),
ev_local_shell_call("sandbox_session_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)
@@ -1141,7 +1125,7 @@ async fn handle_sandbox_error_user_denies_records_tool_decision() {
mount_sse(
&server,
sse(vec![
ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/echo", "deny"]),
ev_local_shell_call("sandbox_deny_call", "completed", vec!["/bin/date"]),
ev_completed("done"),
]),
)