fix: apply_patch shell_serialization tests (#4786)

## Summary Adds additional shell_serialization tests specifically for apply_patch and other cases. ## Test Plan - [x] These are all tests
2025-10-14 13:00:49 -07:00
parent 13035561cd
commit 0a0a10d8b3
5 changed files with 608 additions and 57 deletions
--- a/codex-rs/core/src/executor/backends.rs
+++ b/codex-rs/core/src/executor/backends.rs
@@ -6,6 +6,7 @@ use async_trait::async_trait;
 use crate::CODEX_APPLY_PATCH_ARG1;
 use crate::apply_patch::ApplyPatchExec;
 use crate::exec::ExecParams;
+use crate::executor::ExecutorConfig;
 use crate::function_tool::FunctionCallError;

 pub(crate) enum ExecutionMode {
@@ -22,6 +23,7 @@ pub(crate) trait ExecutionBackend: Send + Sync {
        params: ExecParams,
        // Required for downcasting the apply_patch.
        mode: &ExecutionMode,
+        config: &ExecutorConfig,
    ) -> Result<ExecParams, FunctionCallError>;

    fn stream_stdout(&self, _mode: &ExecutionMode) -> bool {
@@ -47,6 +49,7 @@ impl ExecutionBackend for ShellBackend {
        &self,
        params: ExecParams,
        mode: &ExecutionMode,
+        _config: &ExecutorConfig,
    ) -> Result<ExecParams, FunctionCallError> {
        match mode {
            ExecutionMode::Shell => Ok(params),
@@ -65,17 +68,22 @@ impl ExecutionBackend for ApplyPatchBackend {
        &self,
        params: ExecParams,
        mode: &ExecutionMode,
+        config: &ExecutorConfig,
    ) -> Result<ExecParams, FunctionCallError> {
        match mode {
            ExecutionMode::ApplyPatch(exec) => {
-                let path_to_codex = env::current_exe()
-                    .ok()
-                    .map(|p| p.to_string_lossy().to_string())
-                    .ok_or_else(|| {
-                        FunctionCallError::RespondToModel(
-                            "failed to determine path to codex executable".to_string(),
-                        )
-                    })?;
+                let path_to_codex = if let Some(exe_path) = &config.codex_exe {
+                    exe_path.to_string_lossy().to_string()
+                } else {
+                    env::current_exe()
+                        .ok()
+                        .map(|p| p.to_string_lossy().to_string())
+                        .ok_or_else(|| {
+                            FunctionCallError::RespondToModel(
+                                "failed to determine path to codex executable".to_string(),
+                            )
+                        })?
+                };

                let patch = exec.action.patch.clone();
                Ok(ExecParams {
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -30,19 +30,19 @@ use codex_otel::otel_event_manager::ToolDecisionSource;
 pub(crate) struct ExecutorConfig {
    pub(crate) sandbox_policy: SandboxPolicy,
    pub(crate) sandbox_cwd: PathBuf,
-    codex_linux_sandbox_exe: Option<PathBuf>,
+    pub(crate) codex_exe: Option<PathBuf>,
 }

 impl ExecutorConfig {
    pub(crate) fn new(
        sandbox_policy: SandboxPolicy,
        sandbox_cwd: PathBuf,
-        codex_linux_sandbox_exe: Option<PathBuf>,
+        codex_exe: Option<PathBuf>,
    ) -> Self {
        Self {
            sandbox_policy,
            sandbox_cwd,
-            codex_linux_sandbox_exe,
+            codex_exe,
        }
    }
 }
@@ -86,7 +86,14 @@ impl Executor {
                maybe_translate_shell_command(request.params, session, request.use_shell_profile);
        }

-        // Step 1: Normalise parameters via the selected backend.
+        // Step 1: Snapshot sandbox configuration so it stays stable for this run.
+        let config = self
+            .config
+            .read()
+            .map_err(|_| ExecError::rejection("executor config poisoned"))?
+            .clone();
+
+        // Step 2: Normalise parameters via the selected backend.
        let backend = backend_for_mode(&request.mode);
        let stdout_stream = if backend.stream_stdout(&request.mode) {
            request.stdout_stream.clone()
@@ -94,16 +101,9 @@ impl Executor {
            None
        };
        request.params = backend
-            .prepare(request.params, &request.mode)
+            .prepare(request.params, &request.mode, &config)
            .map_err(ExecError::from)?;

-        // Step 2: Snapshot sandbox configuration so it stays stable for this run.
-        let config = self
-            .config
-            .read()
-            .map_err(|_| ExecError::rejection("executor config poisoned"))?
-            .clone();
-
        // Step 3: Decide sandbox placement, prompting for approval when needed.
        let sandbox_decision = select_sandbox(
            &request,
@@ -227,7 +227,7 @@ impl Executor {
            sandbox,
            &config.sandbox_policy,
            &config.sandbox_cwd,
-            &config.codex_linux_sandbox_exe,
+            &config.codex_exe,
            stdout_stream,
        )
        .await
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@@ -1,4 +1,5 @@
 use std::mem::swap;
+use std::path::PathBuf;
 use std::sync::Arc;

 use codex_core::CodexAuth;
@@ -39,6 +40,12 @@ impl TestCodexBuilder {
        let mut config = load_default_config_for_test(&home);
        config.cwd = cwd.path().to_path_buf();
        config.model_provider = model_provider;
+        config.codex_linux_sandbox_exe = Some(PathBuf::from(
+            assert_cmd::Command::cargo_bin("codex")?
+                .get_program()
+                .to_os_string(),
+        ));
+
        let mut mutators = vec![];
        swap(&mut self.config_mutators, &mut mutators);

--- a/codex-rs/core/tests/suite/shell_serialization.rs
+++ b/codex-rs/core/tests/suite/shell_serialization.rs
@@ -10,9 +10,12 @@ use codex_core::protocol::Op;
 use codex_core::protocol::SandboxPolicy;
 use codex_protocol::config_types::ReasoningSummary;
 use core_test_support::assert_regex_match;
+use core_test_support::responses::ev_apply_patch_function_call;
 use core_test_support::responses::ev_assistant_message;
 use core_test_support::responses::ev_completed;
+use core_test_support::responses::ev_custom_tool_call;
 use core_test_support::responses::ev_function_call;
+use core_test_support::responses::ev_local_shell_call;
 use core_test_support::responses::ev_response_created;
 use core_test_support::responses::mount_sse_sequence;
 use core_test_support::responses::sse;
@@ -21,8 +24,11 @@ use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodex;
 use core_test_support::test_codex::test_codex;
 use core_test_support::wait_for_event;
+use pretty_assertions::assert_eq;
+use regex_lite::Regex;
 use serde_json::Value;
 use serde_json::json;
+use std::fs;

 async fn submit_turn(test: &TestCodex, prompt: &str, sandbox_policy: SandboxPolicy) -> Result<()> {
    let session_model = test.session_configured.model.clone();
@@ -72,6 +78,21 @@ fn find_function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> Option<&
    None
 }

+fn find_custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> Option<&'a Value> {
+    for body in bodies {
+        if let Some(items) = body.get("input").and_then(Value::as_array) {
+            for item in items {
+                if item.get("type").and_then(Value::as_str) == Some("custom_tool_call_output")
+                    && item.get("call_id").and_then(Value::as_str) == Some(call_id)
+                {
+                    return Some(item);
+                }
+            }
+        }
+    }
+    None
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
    skip_if_no_network!(Ok(()));
@@ -120,7 +141,12 @@ async fn shell_output_stays_json_without_freeform_apply_patch() -> Result<()> {
        .and_then(Value::as_str)
        .expect("shell output string");

-    let parsed: Value = serde_json::from_str(output)?;
+    let mut parsed: Value = serde_json::from_str(output)?;
+    if let Some(metadata) = parsed.get_mut("metadata").and_then(Value::as_object_mut) {
+        // duration_seconds is non-deterministic; remove it for deep equality
+        let _ = metadata.remove("duration_seconds");
+    }
+
    assert_eq!(
        parsed
            .get("metadata")
@@ -199,6 +225,83 @@ freeform shell
    Ok(())
 }

+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn shell_output_for_freeform_tool_records_duration() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.include_apply_patch_tool = true;
+    });
+    let test = builder.build(&server).await?;
+
+    #[cfg(target_os = "linux")]
+    let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
+
+    #[cfg(target_os = "macos")]
+    let sleep_cmd = vec!["/bin/bash", "-c", "sleep 1"];
+
+    #[cfg(windows)]
+    let sleep_cmd = "timeout 1";
+
+    let call_id = "shell-structured";
+    let args = json!({
+        "command": sleep_cmd,
+        "timeout_ms": 2_000,
+    });
+    let responses = vec![
+        sse(vec![
+            json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    submit_turn(
+        &test,
+        "run the structured shell command",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("recorded requests present");
+    let bodies = request_bodies(&requests)?;
+    let output_item =
+        find_function_call_output(&bodies, call_id).expect("structured output present");
+    let output = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("structured output string");
+
+    let expected_pattern = r#"(?s)^Exit code: 0
+Wall time: [0-9]+(?:\.[0-9]+)? seconds
+Output:
+$"#;
+    assert_regex_match(expected_pattern, output);
+
+    let wall_time_regex = Regex::new(r"(?m)^Wall (?:time|Clock): ([0-9]+(?:\.[0-9]+)?) seconds$")
+        .expect("compile wall time regex");
+    let wall_time_seconds = wall_time_regex
+        .captures(output)
+        .and_then(|caps| caps.get(1))
+        .and_then(|value| value.as_str().parse::<f32>().ok())
+        .expect("expected structured shell output to contain wall time seconds");
+    assert!(
+        wall_time_seconds > 0.5,
+        "expected wall time to be greater than zero seconds, got {wall_time_seconds}"
+    );
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn shell_output_reserializes_truncated_content() -> Result<()> {
    skip_if_no_network!(Ok(()));
@@ -276,3 +379,446 @@ $"#;

    Ok(())
 }
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn apply_patch_custom_tool_output_is_structured() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.include_apply_patch_tool = true;
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "apply-patch-structured";
+    let file_name = "structured.txt";
+    let patch = format!(
+        r#"*** Begin Patch
+*** Add File: {file_name}
+from custom tool
+*** End Patch
+"#
+    );
+    let responses = vec![
+        sse(vec![
+            json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_custom_tool_call(call_id, "apply_patch", &patch),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    submit_turn(
+        &test,
+        "apply the patch via custom tool",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("recorded requests present");
+    let bodies = request_bodies(&requests)?;
+    let output_item =
+        find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present");
+    let output = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("apply_patch output string");
+
+    let expected_pattern = format!(
+        r"(?s)^Exit code: 0
+Wall time: [0-9]+(?:\.[0-9]+)? seconds
+Output:
+Success. Updated the following files:
+A {file_name}
+?$"
+    );
+    assert_regex_match(&expected_pattern, output);
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn apply_patch_custom_tool_call_creates_file() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.include_apply_patch_tool = true;
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "apply-patch-add-file";
+    let file_name = "custom_tool_apply_patch.txt";
+    let patch = format!(
+        "*** Begin Patch\n*** Add File: {file_name}\n+custom tool content\n*** End Patch\n"
+    );
+    let responses = vec![
+        sse(vec![
+            json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_custom_tool_call(call_id, "apply_patch", &patch),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "apply_patch done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    submit_turn(
+        &test,
+        "apply the patch via custom tool to create a file",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("recorded requests present");
+    let bodies = request_bodies(&requests)?;
+    let output_item =
+        find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present");
+    let output = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("apply_patch output string");
+
+    let expected_pattern = format!(
+        r"(?s)^Exit code: 0
+Wall time: [0-9]+(?:\.[0-9]+)? seconds
+Output:
+Success. Updated the following files:
+A {file_name}
+?$"
+    );
+    assert_regex_match(&expected_pattern, output);
+
+    let new_file_path = test.cwd.path().join(file_name);
+    let created_contents = fs::read_to_string(&new_file_path)?;
+    assert_eq!(
+        created_contents, "custom tool content\n",
+        "expected file contents for {file_name}"
+    );
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn apply_patch_custom_tool_call_updates_existing_file() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.include_apply_patch_tool = true;
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "apply-patch-update-file";
+    let file_name = "custom_tool_apply_patch_existing.txt";
+    let file_path = test.cwd.path().join(file_name);
+    fs::write(&file_path, "before\n")?;
+    let patch = format!(
+        "*** Begin Patch\n*** Update File: {file_name}\n@@\n-before\n+after\n*** End Patch\n"
+    );
+    let responses = vec![
+        sse(vec![
+            json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_custom_tool_call(call_id, "apply_patch", &patch),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "apply_patch update done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    submit_turn(
+        &test,
+        "apply the patch via custom tool to update a file",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("recorded requests present");
+    let bodies = request_bodies(&requests)?;
+    let output_item =
+        find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present");
+    let output = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("apply_patch output string");
+
+    let expected_pattern = format!(
+        r"(?s)^Exit code: 0
+Wall time: [0-9]+(?:\.[0-9]+)? seconds
+Output:
+Success. Updated the following files:
+M {file_name}
+?$"
+    );
+    assert_regex_match(&expected_pattern, output);
+
+    let updated_contents = fs::read_to_string(file_path)?;
+    assert_eq!(updated_contents, "after\n", "expected updated file content");
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn apply_patch_custom_tool_call_reports_failure_output() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.include_apply_patch_tool = true;
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "apply-patch-failure";
+    let missing_file = "missing_custom_tool_apply_patch.txt";
+    let patch = format!(
+        "*** Begin Patch\n*** Update File: {missing_file}\n@@\n-before\n+after\n*** End Patch\n"
+    );
+    let responses = vec![
+        sse(vec![
+            json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_custom_tool_call(call_id, "apply_patch", &patch),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "apply_patch failure done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    submit_turn(
+        &test,
+        "attempt a failing apply_patch via custom tool",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("recorded requests present");
+    let bodies = request_bodies(&requests)?;
+    let output_item =
+        find_custom_tool_call_output(&bodies, call_id).expect("apply_patch output present");
+    let output = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("apply_patch output string");
+
+    let expected_output = format!(
+        "apply_patch verification failed: Failed to read file to update {}/{missing_file}: No such file or directory (os error 2)",
+        test.cwd.path().to_string_lossy()
+    );
+    assert_eq!(output, expected_output);
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn apply_patch_function_call_output_is_structured() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.include_apply_patch_tool = true;
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "apply-patch-function";
+    let file_name = "function_apply_patch.txt";
+    let patch =
+        format!("*** Begin Patch\n*** Add File: {file_name}\n+via function call\n*** End Patch\n");
+    let responses = vec![
+        sse(vec![
+            json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_apply_patch_function_call(call_id, &patch),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "apply_patch function done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    submit_turn(
+        &test,
+        "apply the patch via function-call apply_patch",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("recorded requests present");
+    let bodies = request_bodies(&requests)?;
+    let output_item =
+        find_function_call_output(&bodies, call_id).expect("apply_patch function output present");
+    let output = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("apply_patch output string");
+
+    let expected_pattern = format!(
+        r"(?s)^Exit code: 0
+Wall time: [0-9]+(?:\.[0-9]+)? seconds
+Output:
+Success. Updated the following files:
+A {file_name}
+?$"
+    );
+    assert_regex_match(&expected_pattern, output);
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn shell_output_is_structured_for_nonzero_exit() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5-codex".to_string();
+        config.model_family =
+            find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
+        config.include_apply_patch_tool = true;
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "shell-nonzero-exit";
+    let args = json!({
+        "command": ["/bin/sh", "-c", "exit 42"],
+        "timeout_ms": 1_000,
+    });
+    let responses = vec![
+        sse(vec![
+            json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "shell failure handled"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    submit_turn(
+        &test,
+        "run the failing shell command",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("recorded requests present");
+    let bodies = request_bodies(&requests)?;
+    let output_item = find_function_call_output(&bodies, call_id).expect("shell output present");
+    let output = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("shell output string");
+
+    let expected_output = r"Exit code: 42
+Wall time: 0 seconds
+Output:
+";
+    assert_eq!(output, expected_output);
+    assert!(
+        serde_json::from_str::<Value>(output).is_err(),
+        "expected structured shell output to be plain text",
+    );
+    assert!(
+        output.starts_with("Exit code: 42\n"),
+        "expected non-zero exit code prefix, got {output:?}",
+    );
+    assert!(
+        output.contains("\nOutput:\n"),
+        "expected Output section, got {output:?}",
+    );
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn local_shell_call_output_is_structured() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = start_mock_server().await;
+    let mut builder = test_codex().with_config(|config| {
+        config.model = "gpt-5-codex".to_string();
+        config.model_family =
+            find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
+        config.include_apply_patch_tool = true;
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "local-shell-call";
+    let responses = vec![
+        sse(vec![
+            json!({"type": "response.created", "response": {"id": "resp-1"}}),
+            ev_local_shell_call(call_id, "completed", vec!["/bin/echo", "local shell"]),
+            ev_completed("resp-1"),
+        ]),
+        sse(vec![
+            ev_assistant_message("msg-1", "local shell done"),
+            ev_completed("resp-2"),
+        ]),
+    ];
+    mount_sse_sequence(&server, responses).await;
+
+    submit_turn(
+        &test,
+        "run the local shell command",
+        SandboxPolicy::DangerFullAccess,
+    )
+    .await?;
+
+    let requests = server
+        .received_requests()
+        .await
+        .expect("recorded requests present");
+    let bodies = request_bodies(&requests)?;
+    let output_item =
+        find_function_call_output(&bodies, call_id).expect("local shell output present");
+    let output = output_item
+        .get("output")
+        .and_then(Value::as_str)
+        .expect("local shell output string");
+
+    assert!(
+        serde_json::from_str::<Value>(output).is_err(),
+        "expected structured local shell output to be plain text",
+    );
+    assert!(
+        output.starts_with("Exit code: 0\n"),
+        "expected zero exit code prefix, got {output:?}",
+    );
+    assert!(
+        output.contains("local shell"),
+        "expected command stdout to be present, got {output:?}",
+    );
+
+    Ok(())
+}
--- a/codex-rs/core/tests/suite/tool_harness.rs
+++ b/codex-rs/core/tests/suite/tool_harness.rs
@@ -1,5 +1,7 @@
 #![cfg(not(target_os = "windows"))]

+use std::fs;
+
 use assert_matches::assert_matches;
 use codex_core::features::Feature;
 use codex_core::model_family::find_family_for_model;
@@ -295,15 +297,19 @@ async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()
        ..
    } = builder.build(&server).await?;

+    let file_name = "notes.txt";
+    let file_path = cwd.path().join(file_name);
    let call_id = "apply-patch-call";
-    let patch_content = r#"*** Begin Patch
-*** Add File: notes.txt
+    let patch_content = format!(
+        r#"*** Begin Patch
+*** Add File: {file_name}
 +Tool harness apply patch
-*** End Patch"#;
+*** End Patch"#
+    );

    let first_response = sse(vec![
        ev_response_created("resp-1"),
-        ev_apply_patch_function_call(call_id, patch_content),
+        ev_apply_patch_function_call(call_id, &patch_content),
        ev_completed("resp-1"),
    ]);
    responses::mount_sse_once_match(&server, any(), first_response).await;
@@ -352,6 +358,7 @@ async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()
    assert!(saw_patch_begin, "expected PatchApplyBegin event");
    let patch_end_success =
        patch_end_success.expect("expected PatchApplyEnd event to capture success flag");
+    assert!(patch_end_success);

    let req = second_mock.single_request();
    let output_item = req.function_call_output(call_id);
@@ -361,38 +368,21 @@ async fn apply_patch_tool_executes_and_emits_patch_events() -> anyhow::Result<()
    );
    let output_text = extract_output_text(&output_item).expect("output text present");

-    if let Ok(exec_output) = serde_json::from_str::<Value>(output_text) {
-        let exit_code = exec_output["metadata"]["exit_code"]
-            .as_i64()
-            .expect("exit_code present");
-        let summary = exec_output["output"].as_str().expect("output field");
-        assert_eq!(
-            exit_code, 0,
-            "expected apply_patch exit_code=0, got {exit_code}, summary: {summary:?}"
-        );
-        assert!(
-            patch_end_success,
-            "expected PatchApplyEnd success flag, summary: {summary:?}"
-        );
-        assert!(
-            summary.contains("Success."),
-            "expected apply_patch summary to note success, got {summary:?}"
-        );
+    let expected_pattern = format!(
+        r"(?s)^Exit code: 0
+Wall time: [0-9]+(?:\.[0-9]+)? seconds
+Output:
+Success. Updated the following files:
+A {file_name}
+?$"
+    );
+    assert_regex_match(&expected_pattern, output_text);

-        let patched_path = cwd.path().join("notes.txt");
-        let contents = std::fs::read_to_string(&patched_path)
-            .unwrap_or_else(|e| panic!("failed reading {}: {e}", patched_path.display()));
-        assert_eq!(contents, "Tool harness apply patch\n");
-    } else {
-        assert!(
-            output_text.contains("codex-run-as-apply-patch"),
-            "expected apply_patch failure message to mention codex-run-as-apply-patch, got {output_text:?}"
-        );
-        assert!(
-            !patch_end_success,
-            "expected PatchApplyEnd to report success=false when apply_patch invocation fails"
-        );
-    }
+    let updated_contents = fs::read_to_string(file_path)?;
+    assert_eq!(
+        updated_contents, "Tool harness apply patch\n",
+        "expected updated file content"
+    );

    Ok(())
 }