feat: stream exec stdout events (#1786)

## Summary
- stream command stdout as `ExecCommandStdout` events
- forward streamed stdout to clients and ignore in human output
processor
- adjust call sites for new streaming API
This commit is contained in:
aibrahim-oai
2025-08-01 13:04:34 -07:00
committed by GitHub
parent 8360c6a3ec
commit bc7beddaa2
11 changed files with 238 additions and 5 deletions

10
codex-rs/Cargo.lock generated
View File

@@ -695,6 +695,7 @@ dependencies = [
"reqwest",
"seccompiler",
"serde",
"serde_bytes",
"serde_json",
"sha1",
"shlex",
@@ -3951,6 +3952,15 @@ dependencies = [
"serde_derive",
]
[[package]]
name = "serde_bytes"
version = "0.11.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96"
dependencies = [
"serde",
]
[[package]]
name = "serde_derive"
version = "1.0.219"

View File

@@ -31,6 +31,7 @@ rand = "0.9"
reqwest = { version = "0.12", features = ["json", "stream"] }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_bytes = "0.11"
sha1 = "0.10.6"
shlex = "1.3.0"
strum_macros = "0.27.2"

View File

@@ -48,6 +48,7 @@ use crate::error::SandboxErr;
use crate::exec::ExecParams;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;
use crate::exec::process_exec_tool_call;
use crate::exec_env::create_env;
use crate::mcp_connection_manager::McpConnectionManager;
@@ -1759,6 +1760,11 @@ async fn handle_container_exec_with_params(
sess.ctrl_c.clone(),
&sess.sandbox_policy,
&sess.codex_linux_sandbox_exe,
Some(StdoutStream {
sub_id: sub_id.clone(),
call_id: call_id.clone(),
tx_event: sess.tx_event.clone(),
}),
)
.await;
@@ -1879,6 +1885,11 @@ async fn handle_sandbox_error(
sess.ctrl_c.clone(),
&sess.sandbox_policy,
&sess.codex_linux_sandbox_exe,
Some(StdoutStream {
sub_id: sub_id.clone(),
call_id: call_id.clone(),
tx_event: sess.tx_event.clone(),
}),
)
.await;

View File

@@ -10,6 +10,7 @@ use std::sync::Arc;
use std::time::Duration;
use std::time::Instant;
use async_channel::Sender;
use tokio::io::AsyncRead;
use tokio::io::AsyncReadExt;
use tokio::io::BufReader;
@@ -19,10 +20,15 @@ use tokio::sync::Notify;
use crate::error::CodexErr;
use crate::error::Result;
use crate::error::SandboxErr;
use crate::protocol::Event;
use crate::protocol::EventMsg;
use crate::protocol::ExecCommandStderrDeltaEvent;
use crate::protocol::ExecCommandStdoutDeltaEvent;
use crate::protocol::SandboxPolicy;
use crate::seatbelt::spawn_command_under_seatbelt;
use crate::spawn::StdioPolicy;
use crate::spawn::spawn_child_async;
use serde_bytes::ByteBuf;
// Maximum we send for each stream, which is either:
// - 10KiB OR
@@ -56,18 +62,26 @@ pub enum SandboxType {
LinuxSeccomp,
}
#[derive(Clone)]
pub struct StdoutStream {
pub sub_id: String,
pub call_id: String,
pub tx_event: Sender<Event>,
}
pub async fn process_exec_tool_call(
params: ExecParams,
sandbox_type: SandboxType,
ctrl_c: Arc<Notify>,
sandbox_policy: &SandboxPolicy,
codex_linux_sandbox_exe: &Option<PathBuf>,
stdout_stream: Option<StdoutStream>,
) -> Result<ExecToolCallOutput> {
let start = Instant::now();
let raw_output_result: std::result::Result<RawExecToolCallOutput, CodexErr> = match sandbox_type
{
SandboxType::None => exec(params, sandbox_policy, ctrl_c).await,
SandboxType::None => exec(params, sandbox_policy, ctrl_c, stdout_stream.clone()).await,
SandboxType::MacosSeatbelt => {
let ExecParams {
command,
@@ -83,7 +97,7 @@ pub async fn process_exec_tool_call(
env,
)
.await?;
consume_truncated_output(child, ctrl_c, timeout_ms).await
consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream.clone()).await
}
SandboxType::LinuxSeccomp => {
let ExecParams {
@@ -106,7 +120,7 @@ pub async fn process_exec_tool_call(
)
.await?;
consume_truncated_output(child, ctrl_c, timeout_ms).await
consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream).await
}
};
let duration = start.elapsed();
@@ -233,6 +247,7 @@ async fn exec(
}: ExecParams,
sandbox_policy: &SandboxPolicy,
ctrl_c: Arc<Notify>,
stdout_stream: Option<StdoutStream>,
) -> Result<RawExecToolCallOutput> {
let (program, args) = command.split_first().ok_or_else(|| {
CodexErr::Io(io::Error::new(
@@ -251,7 +266,7 @@ async fn exec(
env,
)
.await?;
consume_truncated_output(child, ctrl_c, timeout_ms).await
consume_truncated_output(child, ctrl_c, timeout_ms, stdout_stream).await
}
/// Consumes the output of a child process, truncating it so it is suitable for
@@ -260,6 +275,7 @@ pub(crate) async fn consume_truncated_output(
mut child: Child,
ctrl_c: Arc<Notify>,
timeout_ms: Option<u64>,
stdout_stream: Option<StdoutStream>,
) -> Result<RawExecToolCallOutput> {
// Both stdout and stderr were configured with `Stdio::piped()`
// above, therefore `take()` should normally return `Some`. If it doesn't
@@ -280,11 +296,15 @@ pub(crate) async fn consume_truncated_output(
BufReader::new(stdout_reader),
MAX_STREAM_OUTPUT,
MAX_STREAM_OUTPUT_LINES,
stdout_stream.clone(),
false,
));
let stderr_handle = tokio::spawn(read_capped(
BufReader::new(stderr_reader),
MAX_STREAM_OUTPUT,
MAX_STREAM_OUTPUT_LINES,
stdout_stream.clone(),
true,
));
let interrupted = ctrl_c.notified();
@@ -318,10 +338,12 @@ pub(crate) async fn consume_truncated_output(
})
}
async fn read_capped<R: AsyncRead + Unpin>(
async fn read_capped<R: AsyncRead + Unpin + Send + 'static>(
mut reader: R,
max_output: usize,
max_lines: usize,
stream: Option<StdoutStream>,
is_stderr: bool,
) -> io::Result<Vec<u8>> {
let mut buf = Vec::with_capacity(max_output.min(8 * 1024));
let mut tmp = [0u8; 8192];
@@ -335,6 +357,27 @@ async fn read_capped<R: AsyncRead + Unpin>(
break;
}
if let Some(stream) = &stream {
let chunk = tmp[..n].to_vec();
let msg = if is_stderr {
EventMsg::ExecCommandStderrDelta(ExecCommandStderrDeltaEvent {
call_id: stream.call_id.clone(),
chunk: ByteBuf::from(chunk),
})
} else {
EventMsg::ExecCommandStdoutDelta(ExecCommandStdoutDeltaEvent {
call_id: stream.call_id.clone(),
chunk: ByteBuf::from(chunk),
})
};
let event = Event {
id: stream.sub_id.clone(),
msg,
};
#[allow(clippy::let_unit_value)]
let _ = stream.tx_event.send(event).await;
}
// Copy into the buffer only while we still have byte and line budget.
if remaining_bytes > 0 && remaining_lines > 0 {
let mut copy_len = 0;

View File

@@ -13,6 +13,7 @@ use std::time::Duration;
use mcp_types::CallToolResult;
use serde::Deserialize;
use serde::Serialize;
use serde_bytes::ByteBuf;
use strum_macros::Display;
use uuid::Uuid;
@@ -323,6 +324,12 @@ pub enum EventMsg {
/// Notification that the server is about to execute a command.
ExecCommandBegin(ExecCommandBeginEvent),
/// Incremental chunk of stdout from a running command.
ExecCommandStdoutDelta(ExecCommandStdoutDeltaEvent),
/// Incremental chunk of stderr from a running command.
ExecCommandStderrDelta(ExecCommandStderrDeltaEvent),
ExecCommandEnd(ExecCommandEndEvent),
ExecApprovalRequest(ExecApprovalRequestEvent),
@@ -476,6 +483,24 @@ pub struct ExecCommandEndEvent {
pub exit_code: i32,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ExecCommandStdoutDeltaEvent {
/// Identifier for the ExecCommandBegin that produced this chunk.
pub call_id: String,
/// Raw stdout bytes (may not be valid UTF-8).
#[serde(with = "serde_bytes")]
pub chunk: ByteBuf,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ExecCommandStderrDeltaEvent {
/// Identifier for the ExecCommandBegin that produced this chunk.
pub call_id: String,
/// Raw stderr bytes (may not be valid UTF-8).
#[serde(with = "serde_bytes")]
pub chunk: ByteBuf,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ExecApprovalRequestEvent {
/// Identifier for the associated exec call, if available.

View File

@@ -220,6 +220,7 @@ mod tests {
Arc::new(Notify::new()),
&SandboxPolicy::DangerFullAccess,
&None,
None,
)
.await
.unwrap();

View File

@@ -0,0 +1,135 @@
#![cfg(unix)]
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use async_channel::Receiver;
use codex_core::exec::ExecParams;
use codex_core::exec::SandboxType;
use codex_core::exec::StdoutStream;
use codex_core::exec::process_exec_tool_call;
use codex_core::protocol::Event;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandStderrDeltaEvent;
use codex_core::protocol::ExecCommandStdoutDeltaEvent;
use codex_core::protocol::SandboxPolicy;
use tokio::sync::Notify;
fn collect_stdout_events(rx: Receiver<Event>) -> Vec<u8> {
let mut out = Vec::new();
while let Ok(ev) = rx.try_recv() {
if let EventMsg::ExecCommandStdoutDelta(ExecCommandStdoutDeltaEvent { chunk, .. }) = ev.msg
{
out.extend_from_slice(&chunk);
}
}
out
}
#[tokio::test]
async fn test_exec_stdout_stream_events_echo() {
let (tx, rx) = async_channel::unbounded::<Event>();
let stdout_stream = StdoutStream {
sub_id: "test-sub".to_string(),
call_id: "call-1".to_string(),
tx_event: tx,
};
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
// Use printf for predictable behavior across shells
"printf 'hello-world\n'".to_string(),
];
let params = ExecParams {
command: cmd,
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
timeout_ms: Some(5_000),
env: HashMap::new(),
};
let ctrl_c = Arc::new(Notify::new());
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
ctrl_c,
&policy,
&None,
Some(stdout_stream),
)
.await;
let result = match result {
Ok(r) => r,
Err(e) => panic!("process_exec_tool_call failed: {e}"),
};
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout, "hello-world\n");
let streamed = collect_stdout_events(rx);
// We should have received at least the same contents (possibly in one chunk)
assert_eq!(String::from_utf8_lossy(&streamed), "hello-world\n");
}
#[tokio::test]
async fn test_exec_stderr_stream_events_echo() {
let (tx, rx) = async_channel::unbounded::<Event>();
let stdout_stream = StdoutStream {
sub_id: "test-sub".to_string(),
call_id: "call-2".to_string(),
tx_event: tx,
};
let cmd = vec![
"/bin/sh".to_string(),
"-c".to_string(),
// Write to stderr explicitly
"printf 'oops\n' 1>&2".to_string(),
];
let params = ExecParams {
command: cmd,
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
timeout_ms: Some(5_000),
env: HashMap::new(),
};
let ctrl_c = Arc::new(Notify::new());
let policy = SandboxPolicy::new_read_only_policy();
let result = process_exec_tool_call(
params,
SandboxType::None,
ctrl_c,
&policy,
&None,
Some(stdout_stream),
)
.await;
let result = match result {
Ok(r) => r,
Err(e) => panic!("process_exec_tool_call failed: {e}"),
};
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout, "");
assert_eq!(result.stderr, "oops\n");
// Collect only stderr delta events
let mut err = Vec::new();
while let Ok(ev) = rx.try_recv() {
if let EventMsg::ExecCommandStderrDelta(ExecCommandStderrDeltaEvent { chunk, .. }) = ev.msg
{
err.extend_from_slice(&chunk);
}
}
assert_eq!(String::from_utf8_lossy(&err), "oops\n");
}

View File

@@ -239,6 +239,8 @@ impl EventProcessor for EventProcessorWithHumanOutput {
cwd.to_string_lossy(),
);
}
EventMsg::ExecCommandStdoutDelta(_) => {}
EventMsg::ExecCommandStderrDelta(_) => {}
EventMsg::ExecCommandEnd(ExecCommandEndEvent {
call_id,
stdout,

View File

@@ -59,6 +59,7 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
ctrl_c,
&sandbox_policy,
&codex_linux_sandbox_exe,
None,
)
.await
.unwrap();
@@ -149,6 +150,7 @@ async fn assert_network_blocked(cmd: &[&str]) {
ctrl_c,
&sandbox_policy,
&codex_linux_sandbox_exe,
None,
)
.await;

View File

@@ -258,6 +258,8 @@ async fn run_codex_tool_session_inner(
| EventMsg::McpToolCallBegin(_)
| EventMsg::McpToolCallEnd(_)
| EventMsg::ExecCommandBegin(_)
| EventMsg::ExecCommandStdoutDelta(_)
| EventMsg::ExecCommandStderrDelta(_)
| EventMsg::ExecCommandEnd(_)
| EventMsg::BackgroundEvent(_)
| EventMsg::PatchApplyBegin(_)

View File

@@ -374,6 +374,7 @@ impl ChatWidget<'_> {
);
self.add_to_history(HistoryCell::new_active_exec_command(command));
}
EventMsg::ExecCommandStdoutDelta(_) => {}
EventMsg::PatchApplyBegin(PatchApplyBeginEvent {
call_id: _,
auto_approved,