chore: align unified_exec (#5442)

Align `unified_exec` with b implementation
2025-10-22 11:50:18 +01:00
parent 53cadb4df6
commit 00b1e130b3
8 changed files with 1060 additions and 596 deletions
--- a/codex-rs/core/tests/suite/tools.rs
+++ b/codex-rs/core/tests/suite/tools.rs
@@ -320,14 +320,22 @@ async fn unified_exec_spec_toggle_end_to_end() -> Result<()> {

    let tools_disabled = collect_tools(false).await?;
    assert!(
-        !tools_disabled.iter().any(|name| name == "unified_exec"),
-        "tools list should not include unified_exec when disabled: {tools_disabled:?}"
+        !tools_disabled.iter().any(|name| name == "exec_command"),
+        "tools list should not include exec_command when disabled: {tools_disabled:?}"
+    );
+    assert!(
+        !tools_disabled.iter().any(|name| name == "write_stdin"),
+        "tools list should not include write_stdin when disabled: {tools_disabled:?}"
    );

    let tools_enabled = collect_tools(true).await?;
    assert!(
-        tools_enabled.iter().any(|name| name == "unified_exec"),
-        "tools list should include unified_exec when enabled: {tools_enabled:?}"
+        tools_enabled.iter().any(|name| name == "exec_command"),
+        "tools list should include exec_command when enabled: {tools_enabled:?}"
+    );
+    assert!(
+        tools_enabled.iter().any(|name| name == "write_stdin"),
+        "tools list should include write_stdin when enabled: {tools_enabled:?}"
    );

    Ok(())
--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@@ -4,7 +4,6 @@ use std::collections::HashMap;

 use anyhow::Result;
 use codex_core::features::Feature;
-use codex_core::parse_command::parse_command;
 use codex_core::protocol::AskForApproval;
 use codex_core::protocol::EventMsg;
 use codex_core::protocol::Op;
@@ -81,16 +80,15 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
    } = builder.build(&server).await?;

    let call_id = "uexec-begin-event";
-    let command = vec!["/bin/echo".to_string(), "hello unified exec".to_string()];
    let args = json!({
-        "input": command.clone(),
-        "timeout_ms": 250,
+        "cmd": "/bin/echo hello unified exec".to_string(),
+        "yield_time_ms": 250,
    });

    let responses = vec![
        sse(vec![
            ev_response_created("resp-1"),
-            ev_function_call(call_id, "unified_exec", &serde_json::to_string(&args)?),
+            ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
            ev_completed("resp-1"),
        ]),
        sse(vec![
@@ -124,9 +122,11 @@ async fn unified_exec_emits_exec_command_begin_event() -> Result<()> {
    })
    .await;

-    assert_eq!(begin_event.command, command);
+    assert_eq!(
+        begin_event.command,
+        vec!["/bin/echo hello unified exec".to_string()]
+    );
    assert_eq!(begin_event.cwd, cwd.path());
-    assert_eq!(begin_event.parsed_cmd, parse_command(&command));

    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;

@@ -154,14 +154,9 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
    } = builder.build(&server).await?;

    let open_call_id = "uexec-open-session";
-    let open_command = vec![
-        "/bin/sh".to_string(),
-        "-c".to_string(),
-        "echo ready".to_string(),
-    ];
    let open_args = json!({
-        "input": open_command.clone(),
-        "timeout_ms": 200,
+        "cmd": "/bin/sh -c echo ready".to_string(),
+        "yield_time_ms": 250,
    });

    let poll_call_id = "uexec-poll-empty";
@@ -176,7 +171,7 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
            ev_response_created("resp-1"),
            ev_function_call(
                open_call_id,
-                "unified_exec",
+                "exec_command",
                &serde_json::to_string(&open_args)?,
            ),
            ev_completed("resp-1"),
@@ -185,7 +180,7 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
            ev_response_created("resp-2"),
            ev_function_call(
                poll_call_id,
-                "unified_exec",
+                "write_stdin",
                &serde_json::to_string(&poll_args)?,
            ),
            ev_completed("resp-2"),
@@ -231,7 +226,292 @@ async fn unified_exec_skips_begin_event_for_empty_input() -> Result<()> {
        "expected only the initial command to emit begin event"
    );
    assert_eq!(begin_events[0].call_id, open_call_id);
-    assert_eq!(begin_events[0].command, open_command);
+    assert_eq!(begin_events[0].command[0], "/bin/sh -c echo ready");
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn exec_command_reports_chunk_and_exit_metadata() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.features.enable(Feature::UnifiedExec);
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let call_id = "uexec-metadata";
+    let args = serde_json::json!({
+        "cmd": "printf 'abcdefghijklmnopqrstuvwxyz'",
+        "yield_time_ms": 500,
+        "max_output_tokens": 6,
+    });
+
+    let responses = vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "run metadata test".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    assert!(!requests.is_empty(), "expected at least one POST request");
+
+    let bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().expect("request json"))
+        .collect::<Vec<_>>();
+
+    let outputs = collect_tool_outputs(&bodies)?;
+    let metadata = outputs
+        .get(call_id)
+        .expect("missing exec_command metadata output");
+
+    let chunk_id = metadata
+        .get("chunk_id")
+        .and_then(Value::as_str)
+        .expect("missing chunk_id");
+    assert_eq!(chunk_id.len(), 6, "chunk id should be 6 hex characters");
+    assert!(
+        chunk_id.chars().all(|c| c.is_ascii_hexdigit()),
+        "chunk id should be hexadecimal: {chunk_id}"
+    );
+
+    let wall_time = metadata
+        .get("wall_time_seconds")
+        .and_then(Value::as_f64)
+        .unwrap_or_default();
+    assert!(
+        wall_time >= 0.0,
+        "wall_time_seconds should be non-negative, got {wall_time}"
+    );
+
+    assert!(
+        metadata.get("session_id").is_none(),
+        "exec_command for a completed process should not include session_id"
+    );
+
+    let exit_code = metadata
+        .get("exit_code")
+        .and_then(Value::as_i64)
+        .expect("expected exit_code");
+    assert_eq!(exit_code, 0, "expected successful exit");
+
+    let output_text = metadata
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("missing output text");
+    assert!(
+        output_text.contains("tokens truncated"),
+        "expected truncation notice in output: {output_text:?}"
+    );
+
+    let original_tokens = metadata
+        .get("original_token_count")
+        .and_then(Value::as_u64)
+        .expect("missing original_token_count");
+    assert!(
+        original_tokens as usize > 6,
+        "original token count should exceed max_output_tokens"
+    );
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn write_stdin_returns_exit_metadata_and_clears_session() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+    skip_if_sandbox!(Ok(()));
+
+    let server = start_mock_server().await;
+
+    let mut builder = test_codex().with_config(|config| {
+        config.features.enable(Feature::UnifiedExec);
+    });
+    let TestCodex {
+        codex,
+        cwd,
+        session_configured,
+        ..
+    } = builder.build(&server).await?;
+
+    let start_call_id = "uexec-cat-start";
+    let send_call_id = "uexec-cat-send";
+    let exit_call_id = "uexec-cat-exit";
+
+    let start_args = serde_json::json!({
+        "cmd": "/bin/cat",
+        "yield_time_ms": 500,
+    });
+    let send_args = serde_json::json!({
+        "chars": "hello unified exec\n",
+        "session_id": 0,
+        "yield_time_ms": 500,
+    });
+    let exit_args = serde_json::json!({
+        "chars": "\u{0004}",
+        "session_id": 0,
+        "yield_time_ms": 500,
+    });
+
+    let responses = vec![
+        sse(vec![
+            ev_response_created("resp-1"),
+            ev_function_call(
+                start_call_id,
+                "exec_command",
+                &serde_json::to_string(&start_args)?,
+            ),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-2"),
+            ev_function_call(
+                send_call_id,
+                "write_stdin",
+                &serde_json::to_string(&send_args)?,
+            ),
+            ev_completed("resp-2"),
+        ]),
+        sse(vec![
+            ev_response_created("resp-3"),
+            ev_function_call(
+                exit_call_id,
+                "write_stdin",
+                &serde_json::to_string(&exit_args)?,
+            ),
+            ev_completed("resp-3"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "all done"),
+            ev_completed("resp-4"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    let session_model = session_configured.model.clone();
+
+    codex
+        .submit(Op::UserTurn {
+            items: vec![UserInput::Text {
+                text: "test write_stdin exit behavior".into(),
+            }],
+            final_output_json_schema: None,
+            cwd: cwd.path().to_path_buf(),
+            approval_policy: AskForApproval::Never,
+            sandbox_policy: SandboxPolicy::DangerFullAccess,
+            model: session_model,
+            effort: None,
+            summary: ReasoningSummary::Auto,
+        })
+        .await?;
+
+    wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
+
+    let requests = server.received_requests().await.expect("recorded requests");
+    assert!(!requests.is_empty(), "expected at least one POST request");
+
+    let bodies = requests
+        .iter()
+        .map(|req| req.body_json::<Value>().expect("request json"))
+        .collect::<Vec<_>>();
+
+    let outputs = collect_tool_outputs(&bodies)?;
+
+    let start_output = outputs
+        .get(start_call_id)
+        .expect("missing start output for exec_command");
+    let session_id = start_output
+        .get("session_id")
+        .and_then(Value::as_i64)
+        .expect("expected session id from exec_command");
+    assert!(
+        session_id >= 0,
+        "session_id should be non-negative, got {session_id}"
+    );
+    assert!(
+        start_output.get("exit_code").is_none(),
+        "initial exec_command should not include exit_code while session is running"
+    );
+
+    let send_output = outputs
+        .get(send_call_id)
+        .expect("missing write_stdin echo output");
+    let echoed = send_output
+        .get("output")
+        .and_then(Value::as_str)
+        .unwrap_or_default();
+    assert!(
+        echoed.contains("hello unified exec"),
+        "expected echoed output from cat, got {echoed:?}"
+    );
+    let echoed_session = send_output
+        .get("session_id")
+        .and_then(Value::as_i64)
+        .expect("write_stdin should return session id while process is running");
+    assert_eq!(
+        echoed_session, session_id,
+        "write_stdin should reuse existing session id"
+    );
+    assert!(
+        send_output.get("exit_code").is_none(),
+        "write_stdin should not include exit_code while process is running"
+    );
+
+    let exit_output = outputs
+        .get(exit_call_id)
+        .expect("missing exit metadata output");
+    assert!(
+        exit_output.get("session_id").is_none(),
+        "session_id should be omitted once the process exits"
+    );
+    let exit_code = exit_output
+        .get("exit_code")
+        .and_then(Value::as_i64)
+        .expect("expected exit_code after sending EOF");
+    assert_eq!(exit_code, 0, "cat should exit cleanly after EOF");
+
+    let exit_chunk = exit_output
+        .get("chunk_id")
+        .and_then(Value::as_str)
+        .expect("missing chunk id for exit output");
+    assert!(
+        exit_chunk.chars().all(|c| c.is_ascii_hexdigit()),
+        "chunk id should be hexadecimal: {exit_chunk}"
+    );

    Ok(())
 }
@@ -255,15 +535,15 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {

    let first_call_id = "uexec-start";
    let first_args = serde_json::json!({
-        "input": ["/bin/cat"],
-        "timeout_ms": 200,
+        "cmd": "/bin/cat",
+        "yield_time_ms": 200,
    });

    let second_call_id = "uexec-stdin";
    let second_args = serde_json::json!({
-        "input": ["hello unified exec\n"],
-        "session_id": "0",
-        "timeout_ms": 500,
+        "chars": "hello unified exec\n",
+        "session_id": 0,
+        "yield_time_ms": 500,
    });

    let responses = vec![
@@ -271,7 +551,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
            ev_response_created("resp-1"),
            ev_function_call(
                first_call_id,
-                "unified_exec",
+                "exec_command",
                &serde_json::to_string(&first_args)?,
            ),
            ev_completed("resp-1"),
@@ -280,7 +560,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
            ev_response_created("resp-2"),
            ev_function_call(
                second_call_id,
-                "unified_exec",
+                "write_stdin",
                &serde_json::to_string(&second_args)?,
            ),
            ev_completed("resp-2"),
@@ -324,9 +604,9 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
    let start_output = outputs
        .get(first_call_id)
        .expect("missing first unified_exec output");
-    let session_id = start_output["session_id"].as_str().unwrap_or_default();
+    let session_id = start_output["session_id"].as_i64().unwrap_or_default();
    assert!(
-        !session_id.is_empty(),
+        session_id >= 0,
        "expected session id in first unified_exec response"
    );
    assert!(
@@ -340,7 +620,7 @@ async fn unified_exec_reuses_session_via_stdin() -> Result<()> {
        .get(second_call_id)
        .expect("missing reused unified_exec output");
    assert_eq!(
-        reuse_output["session_id"].as_str().unwrap_or_default(),
+        reuse_output["session_id"].as_i64().unwrap_or_default(),
        session_id
    );
    let echoed = reuse_output["output"].as_str().unwrap_or_default();
@@ -391,15 +671,15 @@ PY

    let first_call_id = "uexec-lag-start";
    let first_args = serde_json::json!({
-        "input": ["/bin/sh", "-c", script],
-        "timeout_ms": 25,
+        "cmd": script,
+        "yield_time_ms": 25,
    });

    let second_call_id = "uexec-lag-poll";
    let second_args = serde_json::json!({
-        "input": Vec::<String>::new(),
-        "session_id": "0",
-        "timeout_ms": 2_000,
+        "chars": "",
+        "session_id": 0,
+        "yield_time_ms": 2_000,
    });

    let responses = vec![
@@ -407,7 +687,7 @@ PY
            ev_response_created("resp-1"),
            ev_function_call(
                first_call_id,
-                "unified_exec",
+                "exec_command",
                &serde_json::to_string(&first_args)?,
            ),
            ev_completed("resp-1"),
@@ -416,7 +696,7 @@ PY
            ev_response_created("resp-2"),
            ev_function_call(
                second_call_id,
-                "unified_exec",
+                "write_stdin",
                &serde_json::to_string(&second_args)?,
            ),
            ev_completed("resp-2"),
@@ -460,9 +740,9 @@ PY
    let start_output = outputs
        .get(first_call_id)
        .expect("missing initial unified_exec output");
-    let session_id = start_output["session_id"].as_str().unwrap_or_default();
+    let session_id = start_output["session_id"].as_i64().unwrap_or_default();
    assert!(
-        !session_id.is_empty(),
+        session_id >= 0,
        "expected session id from initial unified_exec response"
    );

@@ -497,15 +777,15 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {

    let first_call_id = "uexec-timeout";
    let first_args = serde_json::json!({
-        "input": ["/bin/sh", "-c", "sleep 0.1; echo ready"],
-        "timeout_ms": 10,
+        "cmd": "sleep 0.5; echo ready",
+        "yield_time_ms": 10,
    });

    let second_call_id = "uexec-poll";
    let second_args = serde_json::json!({
-        "input": Vec::<String>::new(),
-        "session_id": "0",
-        "timeout_ms": 800,
+        "chars": "",
+        "session_id": 0,
+        "yield_time_ms": 800,
    });

    let responses = vec![
@@ -513,7 +793,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
            ev_response_created("resp-1"),
            ev_function_call(
                first_call_id,
-                "unified_exec",
+                "exec_command",
                &serde_json::to_string(&first_args)?,
            ),
            ev_completed("resp-1"),
@@ -522,7 +802,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
            ev_response_created("resp-2"),
            ev_function_call(
                second_call_id,
-                "unified_exec",
+                "write_stdin",
                &serde_json::to_string(&second_args)?,
            ),
            ev_completed("resp-2"),
@@ -569,7 +849,7 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
    let outputs = collect_tool_outputs(&bodies)?;

    let first_output = outputs.get(first_call_id).expect("missing timeout output");
-    assert_eq!(first_output["session_id"], "0");
+    assert_eq!(first_output["session_id"], 0);
    assert!(
        first_output["output"]
            .as_str()