chore: align unified_exec (#5442)

Align `unified_exec` with b implementation
This commit is contained in:
jif-oai
2025-10-22 11:50:18 +01:00
committed by GitHub
parent 53cadb4df6
commit 00b1e130b3
8 changed files with 1060 additions and 596 deletions

View File

@@ -320,14 +320,22 @@ async fn unified_exec_spec_toggle_end_to_end() -> Result<()> {
let tools_disabled = collect_tools(false).await?;
assert!(
!tools_disabled.iter().any(|name| name == "unified_exec"),
"tools list should not include unified_exec when disabled: {tools_disabled:?}"
!tools_disabled.iter().any(|name| name == "exec_command"),
"tools list should not include exec_command when disabled: {tools_disabled:?}"
);
assert!(
!tools_disabled.iter().any(|name| name == "write_stdin"),
"tools list should not include write_stdin when disabled: {tools_disabled:?}"
);
let tools_enabled = collect_tools(true).await?;
assert!(
tools_enabled.iter().any(|name| name == "unified_exec"),
"tools list should include unified_exec when enabled: {tools_enabled:?}"
tools_enabled.iter().any(|name| name == "exec_command"),
"tools list should include exec_command when enabled: {tools_enabled:?}"
);
assert!(
tools_enabled.iter().any(|name| name == "write_stdin"),
"tools list should include write_stdin when enabled: {tools_enabled:?}"
);
Ok(())

View File

@@ -4,7 +4,6 @@ use std::collections::HashMap;
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::parse_command::parse_command;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@@ -81,16 +80,15 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
} = builder.build(&server).await?;
let call_id = "uexec-begin-event";
let command = vec!["/bin/echo".to_string(), "hello unified exec".to_string()];
let args = json!({
"input": command.clone(),
"timeout_ms": 250,
"cmd": "/bin/echo hello unified exec".to_string(),
"yield_time_ms": 250,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "unified_exec", &serde_json::to_string(&args)?),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
@@ -124,9 +122,11 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
})
.await;
assert_eq!(begin_event.command, command);
assert_eq!(
begin_event.command,
vec!["/bin/echo hello unified exec".to_string()]
);
assert_eq!(begin_event.cwd, cwd.path());
assert_eq!(begin_event.parsed_cmd, parse_command(&command));
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
@@ -154,14 +154,9 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
} = builder.build(&server).await?;
let open_call_id = "uexec-open-session";
let open_command = vec![
"/bin/sh".to_string(),
"-c".to_string(),
"echo ready".to_string(),
];
let open_args = json!({
"input": open_command.clone(),
"timeout_ms": 200,
"cmd": "/bin/sh -c echo ready".to_string(),
"yield_time_ms": 250,
});
let poll_call_id = "uexec-poll-empty";
@@ -176,7 +171,7 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
ev_response_created("resp-1"),
ev_function_call(
open_call_id,
"unified_exec",
"exec_command",
&serde_json::to_string(&open_args)?,
),
ev_completed("resp-1"),
@@ -185,7 +180,7 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
ev_response_created("resp-2"),
ev_function_call(
poll_call_id,
"unified_exec",
"write_stdin",
&serde_json::to_string(&poll_args)?,
),
ev_completed("resp-2"),
@@ -231,7 +226,292 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
"expected only the initial command to emit begin event"
);
assert_eq!(begin_events[0].call_id, open_call_id);
assert_eq!(begin_events[0].command, open_command);
assert_eq!(begin_events[0].command[0], "/bin/sh -c echo ready");
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.features.enable(Feature::UnifiedExec);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let call_id = "uexec-metadata";
let args = serde_json::json!({
"cmd": "printf 'abcdefghijklmnopqrstuvwxyz'",
"yield_time_ms": 500,
"max_output_tokens": 6,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run metadata test".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let outputs = collect_tool_outputs(&bodies)?;
let metadata = outputs
.get(call_id)
.expect("missing exec_command metadata output");
let chunk_id = metadata
.get("chunk_id")
.and_then(Value::as_str)
.expect("missing chunk_id");
assert_eq!(chunk_id.len(), 6, "chunk id should be 6 hex characters");
assert!(
chunk_id.chars().all(|c| c.is_ascii_hexdigit()),
"chunk id should be hexadecimal: {chunk_id}"
);
let wall_time = metadata
.get("wall_time_seconds")
.and_then(Value::as_f64)
.unwrap_or_default();
assert!(
wall_time >= 0.0,
"wall_time_seconds should be non-negative, got {wall_time}"
);
assert!(
metadata.get("session_id").is_none(),
"exec_command for a completed process should not include session_id"
);
let exit_code = metadata
.get("exit_code")
.and_then(Value::as_i64)
.expect("expected exit_code");
assert_eq!(exit_code, 0, "expected successful exit");
let output_text = metadata
.get("output")
.and_then(Value::as_str)
.expect("missing output text");
assert!(
output_text.contains("tokens truncated"),
"expected truncation notice in output: {output_text:?}"
);
let original_tokens = metadata
.get("original_token_count")
.and_then(Value::as_u64)
.expect("missing original_token_count");
assert!(
original_tokens as usize > 6,
"original token count should exceed max_output_tokens"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.features.enable(Feature::UnifiedExec);
});
let TestCodex {
codex,
cwd,
session_configured,
..
} = builder.build(&server).await?;
let start_call_id = "uexec-cat-start";
let send_call_id = "uexec-cat-send";
let exit_call_id = "uexec-cat-exit";
let start_args = serde_json::json!({
"cmd": "/bin/cat",
"yield_time_ms": 500,
});
let send_args = serde_json::json!({
"chars": "hello unified exec\n",
"session_id": 0,
"yield_time_ms": 500,
});
let exit_args = serde_json::json!({
"chars": "\u{0004}",
"session_id": 0,
"yield_time_ms": 500,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(
start_call_id,
"exec_command",
&serde_json::to_string(&start_args)?,
),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_function_call(
send_call_id,
"write_stdin",
&serde_json::to_string(&send_args)?,
),
ev_completed("resp-2"),
]),
sse(vec![
ev_response_created("resp-3"),
ev_function_call(
exit_call_id,
"write_stdin",
&serde_json::to_string(&exit_args)?,
),
ev_completed("resp-3"),
]),
sse(vec![
ev_assistant_message("msg-1", "all done"),
ev_completed("resp-4"),
]),
];
mount_sse_sequence(&server, responses).await;
let session_model = session_configured.model.clone();
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "test write_stdin exit behavior".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: session_model,
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
let requests = server.received_requests().await.expect("recorded requests");
assert!(!requests.is_empty(), "expected at least one POST request");
let bodies = requests
.iter()
.map(|req| req.body_json::<Value>().expect("request json"))
.collect::<Vec<_>>();
let outputs = collect_tool_outputs(&bodies)?;
let start_output = outputs
.get(start_call_id)
.expect("missing start output for exec_command");
let session_id = start_output
.get("session_id")
.and_then(Value::as_i64)
.expect("expected session id from exec_command");
assert!(
session_id >= 0,
"session_id should be non-negative, got {session_id}"
);
assert!(
start_output.get("exit_code").is_none(),
"initial exec_command should not include exit_code while session is running"
);
let send_output = outputs
.get(send_call_id)
.expect("missing write_stdin echo output");
let echoed = send_output
.get("output")
.and_then(Value::as_str)
.unwrap_or_default();
assert!(
echoed.contains("hello unified exec"),
"expected echoed output from cat, got {echoed:?}"
);
let echoed_session = send_output
.get("session_id")
.and_then(Value::as_i64)
.expect("write_stdin should return session id while process is running");
assert_eq!(
echoed_session, session_id,
"write_stdin should reuse existing session id"
);
assert!(
send_output.get("exit_code").is_none(),
"write_stdin should not include exit_code while process is running"
);
let exit_output = outputs
.get(exit_call_id)
.expect("missing exit metadata output");
assert!(
exit_output.get("session_id").is_none(),
"session_id should be omitted once the process exits"
);
let exit_code = exit_output
.get("exit_code")
.and_then(Value::as_i64)
.expect("expected exit_code after sending EOF");
assert_eq!(exit_code, 0, "cat should exit cleanly after EOF");
let exit_chunk = exit_output
.get("chunk_id")
.and_then(Value::as_str)
.expect("missing chunk id for exit output");
assert!(
exit_chunk.chars().all(|c| c.is_ascii_hexdigit()),
"chunk id should be hexadecimal: {exit_chunk}"
);
Ok(())
}
@@ -255,15 +535,15 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
let first_call_id = "uexec-start";
let first_args = serde_json::json!({
"input": ["/bin/cat"],
"timeout_ms": 200,
"cmd": "/bin/cat",
"yield_time_ms": 200,
});
let second_call_id = "uexec-stdin";
let second_args = serde_json::json!({
"input": ["hello unified exec\n"],
"session_id": "0",
"timeout_ms": 500,
"chars": "hello unified exec\n",
"session_id": 0,
"yield_time_ms": 500,
});
let responses = vec![
@@ -271,7 +551,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
ev_response_created("resp-1"),
ev_function_call(
first_call_id,
"unified_exec",
"exec_command",
&serde_json::to_string(&first_args)?,
),
ev_completed("resp-1"),
@@ -280,7 +560,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
ev_response_created("resp-2"),
ev_function_call(
second_call_id,
"unified_exec",
"write_stdin",
&serde_json::to_string(&second_args)?,
),
ev_completed("resp-2"),
@@ -324,9 +604,9 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
let start_output = outputs
.get(first_call_id)
.expect("missing first unified_exec output");
let session_id = start_output["session_id"].as_str().unwrap_or_default();
let session_id = start_output["session_id"].as_i64().unwrap_or_default();
assert!(
!session_id.is_empty(),
session_id >= 0,
"expected session id in first unified_exec response"
);
assert!(
@@ -340,7 +620,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
.get(second_call_id)
.expect("missing reused unified_exec output");
assert_eq!(
reuse_output["session_id"].as_str().unwrap_or_default(),
reuse_output["session_id"].as_i64().unwrap_or_default(),
session_id
);
let echoed = reuse_output["output"].as_str().unwrap_or_default();
@@ -391,15 +671,15 @@ PY
let first_call_id = "uexec-lag-start";
let first_args = serde_json::json!({
"input": ["/bin/sh", "-c", script],
"timeout_ms": 25,
"cmd": script,
"yield_time_ms": 25,
});
let second_call_id = "uexec-lag-poll";
let second_args = serde_json::json!({
"input": Vec::<String>::new(),
"session_id": "0",
"timeout_ms": 2_000,
"chars": "",
"session_id": 0,
"yield_time_ms": 2_000,
});
let responses = vec![
@@ -407,7 +687,7 @@ PY
ev_response_created("resp-1"),
ev_function_call(
first_call_id,
"unified_exec",
"exec_command",
&serde_json::to_string(&first_args)?,
),
ev_completed("resp-1"),
@@ -416,7 +696,7 @@ PY
ev_response_created("resp-2"),
ev_function_call(
second_call_id,
"unified_exec",
"write_stdin",
&serde_json::to_string(&second_args)?,
),
ev_completed("resp-2"),
@@ -460,9 +740,9 @@ PY
let start_output = outputs
.get(first_call_id)
.expect("missing initial unified_exec output");
let session_id = start_output["session_id"].as_str().unwrap_or_default();
let session_id = start_output["session_id"].as_i64().unwrap_or_default();
assert!(
!session_id.is_empty(),
session_id >= 0,
"expected session id from initial unified_exec response"
);
@@ -497,15 +777,15 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
let first_call_id = "uexec-timeout";
let first_args = serde_json::json!({
"input": ["/bin/sh", "-c", "sleep 0.1; echo ready"],
"timeout_ms": 10,
"cmd": "sleep 0.5; echo ready",
"yield_time_ms": 10,
});
let second_call_id = "uexec-poll";
let second_args = serde_json::json!({
"input": Vec::<String>::new(),
"session_id": "0",
"timeout_ms": 800,
"chars": "",
"session_id": 0,
"yield_time_ms": 800,
});
let responses = vec![
@@ -513,7 +793,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
ev_response_created("resp-1"),
ev_function_call(
first_call_id,
"unified_exec",
"exec_command",
&serde_json::to_string(&first_args)?,
),
ev_completed("resp-1"),
@@ -522,7 +802,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
ev_response_created("resp-2"),
ev_function_call(
second_call_id,
"unified_exec",
"write_stdin",
&serde_json::to_string(&second_args)?,
),
ev_completed("resp-2"),
@@ -569,7 +849,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
let outputs = collect_tool_outputs(&bodies)?;
let first_output = outputs.get(first_call_id).expect("missing timeout output");
assert_eq!(first_output["session_id"], "0");
assert_eq!(first_output["session_id"], 0);
assert!(
first_output["output"]
.as_str()