codex-rs/exec/tests/apply_patch.rs

#![allow(clippy::expect_used, clippy::unwrap_used)]

use anyhow::Context;
use assert_cmd::prelude::*;
use codex_core::CODEX_APPLY_PATCH_ARG1;
use std::fs;
use std::process::Command;
use tempfile::tempdir;

/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
/// at some point, we must ensure that the smaller `codex-exec` CLI can still
/// emulate the `apply_patch` CLI.
#[test]
fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
    let tmp = tempdir()?;
    let relative_path = "source.txt";
    let absolute_path = tmp.path().join(relative_path);
    fs::write(&absolute_path, "original content\n")?;

    Command::cargo_bin("codex-exec")
        .context("should find binary for codex-exec")?
        .arg(CODEX_APPLY_PATCH_ARG1)
        .arg(
            r#"*** Begin Patch
*** Update File: source.txt
@@
-original content
+modified by apply_patch
*** End Patch"#,
        )
        .current_dir(tmp.path())
        .assert()
        .success()
        .stdout("Success. Updated the following files:\nM source.txt\n")
        .stderr(predicates::str::is_empty());
    assert_eq!(
        fs::read_to_string(absolute_path)?,
        "modified by apply_patch\n"
    );
    Ok(())
}

#[cfg(not(target_os = "windows"))]
#[tokio::test]
async fn test_apply_patch_tool() -> anyhow::Result<()> {
    use core_test_support::load_sse_fixture_with_id_from_str;
    use tempfile::TempDir;
    use wiremock::Mock;
    use wiremock::MockServer;
    use wiremock::ResponseTemplate;
    use wiremock::matchers::method;
    use wiremock::matchers::path;

    const SSE_TOOL_CALL_ADD: &str = r#"[
  {
    "type": "response.output_item.done",
    "item": {
      "type": "function_call",
      "name": "apply_patch",
      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",
      "call_id": "__ID__"
    }
  },
  {
    "type": "response.completed",
    "response": {
      "id": "__ID__",
      "usage": {
        "input_tokens": 0,
        "input_tokens_details": null,
        "output_tokens": 0,
        "output_tokens_details": null,
        "total_tokens": 0
      },
      "output": []
    }
  }
]"#;

    const SSE_TOOL_CALL_UPDATE: &str = r#"[
  {
    "type": "response.output_item.done",
    "item": {
      "type": "function_call",
      "name": "apply_patch",
      "arguments": "{\n  \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
      "call_id": "__ID__"
    }
  },
  {
    "type": "response.completed",
    "response": {
      "id": "__ID__",
      "usage": {
        "input_tokens": 0,
        "input_tokens_details": null,
        "output_tokens": 0,
        "output_tokens_details": null,
        "total_tokens": 0
      },
      "output": []
    }
  }
]"#;

    const SSE_TOOL_CALL_COMPLETED: &str = r#"[
  {
    "type": "response.completed",
    "response": {
      "id": "__ID__",
      "usage": {
        "input_tokens": 0,
        "input_tokens_details": null,
        "output_tokens": 0,
        "output_tokens_details": null,
        "total_tokens": 0
      },
      "output": []
    }
  }
]"#;

    // Start a mock model server
    let server = MockServer::start().await;

    // First response: model calls apply_patch to create test.md
    let first = ResponseTemplate::new(200)
        .insert_header("content-type", "text/event-stream")
        .set_body_raw(
            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
            "text/event-stream",
        );

    Mock::given(method("POST"))
        // .and(path("/v1/responses"))
        .respond_with(first)
        .up_to_n_times(1)
        .mount(&server)
        .await;

    // Second response: model calls apply_patch to update test.md
    let second = ResponseTemplate::new(200)
        .insert_header("content-type", "text/event-stream")
        .set_body_raw(
            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
            "text/event-stream",
        );

    Mock::given(method("POST"))
        .and(path("/v1/responses"))
        .respond_with(second)
        .up_to_n_times(1)
        .mount(&server)
        .await;

    let final_completed = ResponseTemplate::new(200)
        .insert_header("content-type", "text/event-stream")
        .set_body_raw(
            load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
            "text/event-stream",
        );

    Mock::given(method("POST"))
        // .and(path("/v1/responses"))
        .respond_with(final_completed)
        .expect(1)
        .mount(&server)
        .await;

    let tmp_cwd = TempDir::new().unwrap();
    Command::cargo_bin("codex-exec")
        .context("should find binary for codex-exec")?
        .current_dir(tmp_cwd.path())
        .env("CODEX_HOME", tmp_cwd.path())
        .env("OPENAI_API_KEY", "dummy")
        .env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
        .arg("--skip-git-repo-check")
        .arg("-s")
        .arg("workspace-write")
        .arg("foo")
        .assert()
        .success();

    // Verify final file contents
    let final_path = tmp_cwd.path().join("test.md");
    let contents = std::fs::read_to_string(&final_path)
        .unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
    assert_eq!(contents, "Final text\n");
    Ok(())
}
[tools] Add apply_patch tool (#2303) ## Summary We've been seeing a number of issues and reports with our synthetic `apply_patch` tool, e.g. #802. Let's make this a real tool - in my anecdotal testing, it's critical for GPT-OSS models, but I'd like to make it the standard across GPT-5 and codex models as well. ## Testing - [x] Tested locally - [x] Integration test 2025-08-15 11:55:53 -04:00			`#![allow(clippy::expect_used, clippy::unwrap_used)]`

fix: support special --codex-run-as-apply-patch arg (#1702) This introduces some special behavior to the CLIs that are using the `codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then it will run as if `apply_patch arg2` were invoked. This is important because it means we can do things like: ``` SANDBOX_TYPE=landlock # or seatbelt for macOS codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH ``` which gives us a way to run `apply_patch` while ensuring it adheres to the sandbox the user specified. While it would be nice to use the `arg0` trick like we are currently doing for `codex-linux-sandbox`, there is no way to specify the `arg0` for the underlying command when running under `/usr/bin/sandbox-exec`, so it will not work for us in this case. Admittedly, we could have also supported this via a custom environment variable (e.g., `CODEX_ARG0`), but since environment variables are inherited by child processes, that seemed like a potentially leakier abstraction. This change, as well as our existing reliance on checking `arg0`, place additional requirements on those who include `codex-core`. Its `README.md` has been updated to reflect this. While we could have just added an `apply-patch` subcommand to the `codex` multitool CLI, that would not be sufficient for the standalone `codex-exec` CLI, which is something that we distribute as part of our GitHub releases for those who know they will not be using the TUI and therefore prefer to use a slightly smaller executable: https://github.com/openai/codex/releases/tag/rust-v0.10.0 To that end, this PR adds an integration test to ensure that the `--codex-run-as-apply-patch` option works with the standalone `codex-exec` CLI. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702). * #1705 * #1703 * __->__ #1702 * #1698 * #1697 2025-07-28 09:26:44 -07:00			`use anyhow::Context;`
			`use assert_cmd::prelude::*;`
fix: run apply_patch calls through the sandbox (#1705) Building on the work of https://github.com/openai/codex/pull/1702, this changes how a shell call to `apply_patch` is handled. Previously, a shell call to `apply_patch` was always handled in-process, never leveraging a sandbox. To determine whether the `apply_patch` operation could be auto-approved, the `is_write_patch_constrained_to_writable_paths()` function would check if all the paths listed in the paths were writable. If so, the agent would apply the changes listed in the patch. Unfortunately, this approach afforded a loophole: symlinks! * For a soft link, we could fix this issue by tracing the link and checking whether the target is in the set of writable paths, however... * ...For a hard link, things are not as simple. We can run `stat FILE` to see if the number of links is greater than 1, but then we would have to do something potentially expensive like `find . -inum <inode_number>` to find the other paths for `FILE`. Further, even if this worked, this approach runs the risk of a [TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use) race condition, so it is not robust. The solution, implemented in this PR, is to take the virtual execution of the `apply_patch` CLI into an _actual_ execution using `codex --codex-run-as-apply-patch PATCH`, which we can run under the sandbox the user specified, just like any other `shell` call. This, of course, assumes that the sandbox prevents writing through symlinks as a mechanism to write to folders that are not in the writable set configured by the sandbox. I verified this by testing the following on both Mac and Linux: ```shell #!/usr/bin/env bash set -euo pipefail # Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR? # Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory. SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX) # EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it. EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX) echo "SANDBOX_DIR: $SANDBOX_DIR" echo "EXPLOIT_DIR: $EXPLOIT_DIR" cleanup() { # Only remove if it looks sane and still exists [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR" [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR" } trap cleanup EXIT echo "I am the original content" > "${EXPLOIT_DIR}/original.txt" # Drop the -s to test hard links. ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt" cat "${SANDBOX_DIR}/link-to-original.txt" if [[ "$(uname)" == "Linux" ]]; then SANDBOX_SUBCOMMAND=landlock else SANDBOX_SUBCOMMAND=seatbelt fi # Attempt the exploit cd "${SANDBOX_DIR}" codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" \|\| true cat "${EXPLOIT_DIR}/original.txt" ``` Admittedly, this change merits a proper integration test, but I think I will have to do that in a follow-up PR. 2025-07-30 16:45:08 -07:00			`use codex_core::CODEX_APPLY_PATCH_ARG1;`
fix: support special --codex-run-as-apply-patch arg (#1702) This introduces some special behavior to the CLIs that are using the `codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then it will run as if `apply_patch arg2` were invoked. This is important because it means we can do things like: ``` SANDBOX_TYPE=landlock # or seatbelt for macOS codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH ``` which gives us a way to run `apply_patch` while ensuring it adheres to the sandbox the user specified. While it would be nice to use the `arg0` trick like we are currently doing for `codex-linux-sandbox`, there is no way to specify the `arg0` for the underlying command when running under `/usr/bin/sandbox-exec`, so it will not work for us in this case. Admittedly, we could have also supported this via a custom environment variable (e.g., `CODEX_ARG0`), but since environment variables are inherited by child processes, that seemed like a potentially leakier abstraction. This change, as well as our existing reliance on checking `arg0`, place additional requirements on those who include `codex-core`. Its `README.md` has been updated to reflect this. While we could have just added an `apply-patch` subcommand to the `codex` multitool CLI, that would not be sufficient for the standalone `codex-exec` CLI, which is something that we distribute as part of our GitHub releases for those who know they will not be using the TUI and therefore prefer to use a slightly smaller executable: https://github.com/openai/codex/releases/tag/rust-v0.10.0 To that end, this PR adds an integration test to ensure that the `--codex-run-as-apply-patch` option works with the standalone `codex-exec` CLI. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702). * #1705 * #1703 * __->__ #1702 * #1698 * #1697 2025-07-28 09:26:44 -07:00			`use std::fs;`
			`use std::process::Command;`
			`use tempfile::tempdir;`

			/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
			/// at some point, we must ensure that the smaller `codex-exec` CLI can still
			/// emulate the `apply_patch` CLI.
			`#[test]`
			`fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {`
			`let tmp = tempdir()?;`
			`let relative_path = "source.txt";`
			`let absolute_path = tmp.path().join(relative_path);`
			`fs::write(&absolute_path, "original content\n")?;`

			`Command::cargo_bin("codex-exec")`
			`.context("should find binary for codex-exec")?`
fix: run apply_patch calls through the sandbox (#1705) Building on the work of https://github.com/openai/codex/pull/1702, this changes how a shell call to `apply_patch` is handled. Previously, a shell call to `apply_patch` was always handled in-process, never leveraging a sandbox. To determine whether the `apply_patch` operation could be auto-approved, the `is_write_patch_constrained_to_writable_paths()` function would check if all the paths listed in the paths were writable. If so, the agent would apply the changes listed in the patch. Unfortunately, this approach afforded a loophole: symlinks! * For a soft link, we could fix this issue by tracing the link and checking whether the target is in the set of writable paths, however... * ...For a hard link, things are not as simple. We can run `stat FILE` to see if the number of links is greater than 1, but then we would have to do something potentially expensive like `find . -inum <inode_number>` to find the other paths for `FILE`. Further, even if this worked, this approach runs the risk of a [TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use) race condition, so it is not robust. The solution, implemented in this PR, is to take the virtual execution of the `apply_patch` CLI into an _actual_ execution using `codex --codex-run-as-apply-patch PATCH`, which we can run under the sandbox the user specified, just like any other `shell` call. This, of course, assumes that the sandbox prevents writing through symlinks as a mechanism to write to folders that are not in the writable set configured by the sandbox. I verified this by testing the following on both Mac and Linux: ```shell #!/usr/bin/env bash set -euo pipefail # Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR? # Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory. SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX) # EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it. EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX) echo "SANDBOX_DIR: $SANDBOX_DIR" echo "EXPLOIT_DIR: $EXPLOIT_DIR" cleanup() { # Only remove if it looks sane and still exists [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR" [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR" } trap cleanup EXIT echo "I am the original content" > "${EXPLOIT_DIR}/original.txt" # Drop the -s to test hard links. ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt" cat "${SANDBOX_DIR}/link-to-original.txt" if [[ "$(uname)" == "Linux" ]]; then SANDBOX_SUBCOMMAND=landlock else SANDBOX_SUBCOMMAND=seatbelt fi # Attempt the exploit cd "${SANDBOX_DIR}" codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" \|\| true cat "${EXPLOIT_DIR}/original.txt" ``` Admittedly, this change merits a proper integration test, but I think I will have to do that in a follow-up PR. 2025-07-30 16:45:08 -07:00			`.arg(CODEX_APPLY_PATCH_ARG1)`
fix: support special --codex-run-as-apply-patch arg (#1702) This introduces some special behavior to the CLIs that are using the `codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then it will run as if `apply_patch arg2` were invoked. This is important because it means we can do things like: ``` SANDBOX_TYPE=landlock # or seatbelt for macOS codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH ``` which gives us a way to run `apply_patch` while ensuring it adheres to the sandbox the user specified. While it would be nice to use the `arg0` trick like we are currently doing for `codex-linux-sandbox`, there is no way to specify the `arg0` for the underlying command when running under `/usr/bin/sandbox-exec`, so it will not work for us in this case. Admittedly, we could have also supported this via a custom environment variable (e.g., `CODEX_ARG0`), but since environment variables are inherited by child processes, that seemed like a potentially leakier abstraction. This change, as well as our existing reliance on checking `arg0`, place additional requirements on those who include `codex-core`. Its `README.md` has been updated to reflect this. While we could have just added an `apply-patch` subcommand to the `codex` multitool CLI, that would not be sufficient for the standalone `codex-exec` CLI, which is something that we distribute as part of our GitHub releases for those who know they will not be using the TUI and therefore prefer to use a slightly smaller executable: https://github.com/openai/codex/releases/tag/rust-v0.10.0 To that end, this PR adds an integration test to ensure that the `--codex-run-as-apply-patch` option works with the standalone `codex-exec` CLI. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702). * #1705 * #1703 * __->__ #1702 * #1698 * #1697 2025-07-28 09:26:44 -07:00			`.arg(`
			`r#"*** Begin Patch`
			`*** Update File: source.txt`
			`@@`
			`-original content`
			`+modified by apply_patch`
			`*** End Patch"#,`
			`)`
			`.current_dir(tmp.path())`
			`.assert()`
			`.success()`
			`.stdout("Success. Updated the following files:\nM source.txt\n")`
			`.stderr(predicates::str::is_empty());`
			`assert_eq!(`
			`fs::read_to_string(absolute_path)?,`
			`"modified by apply_patch\n"`
			`);`
			`Ok(())`
			`}`
[tools] Add apply_patch tool (#2303) ## Summary We've been seeing a number of issues and reports with our synthetic `apply_patch` tool, e.g. #802. Let's make this a real tool - in my anecdotal testing, it's critical for GPT-OSS models, but I'd like to make it the standard across GPT-5 and codex models as well. ## Testing - [x] Tested locally - [x] Integration test 2025-08-15 11:55:53 -04:00
			`#[cfg(not(target_os = "windows"))]`
			`#[tokio::test]`
			`async fn test_apply_patch_tool() -> anyhow::Result<()> {`
			`use core_test_support::load_sse_fixture_with_id_from_str;`
			`use tempfile::TempDir;`
			`use wiremock::Mock;`
			`use wiremock::MockServer;`
			`use wiremock::ResponseTemplate;`
			`use wiremock::matchers::method;`
			`use wiremock::matchers::path;`

			`const SSE_TOOL_CALL_ADD: &str = r#"[`
			`{`
			`"type": "response.output_item.done",`
			`"item": {`
			`"type": "function_call",`
			`"name": "apply_patch",`
			`"arguments": "{\n \"input\": \"* Begin Patch\\n* Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",`
			`"call_id": "__ID__"`
			`}`
			`},`
			`{`
			`"type": "response.completed",`
			`"response": {`
			`"id": "__ID__",`
			`"usage": {`
			`"input_tokens": 0,`
			`"input_tokens_details": null,`
			`"output_tokens": 0,`
			`"output_tokens_details": null,`
			`"total_tokens": 0`
			`},`
			`"output": []`
			`}`
			`}`
			`]"#;`

			`const SSE_TOOL_CALL_UPDATE: &str = r#"[`
			`{`
			`"type": "response.output_item.done",`
			`"item": {`
			`"type": "function_call",`
			`"name": "apply_patch",`
			`"arguments": "{\n \"input\": \"* Begin Patch\\n* Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",`
			`"call_id": "__ID__"`
			`}`
			`},`
			`{`
			`"type": "response.completed",`
			`"response": {`
			`"id": "__ID__",`
			`"usage": {`
			`"input_tokens": 0,`
			`"input_tokens_details": null,`
			`"output_tokens": 0,`
			`"output_tokens_details": null,`
			`"total_tokens": 0`
			`},`
			`"output": []`
			`}`
			`}`
			`]"#;`

			`const SSE_TOOL_CALL_COMPLETED: &str = r#"[`
			`{`
			`"type": "response.completed",`
			`"response": {`
			`"id": "__ID__",`
			`"usage": {`
			`"input_tokens": 0,`
			`"input_tokens_details": null,`
			`"output_tokens": 0,`
			`"output_tokens_details": null,`
			`"total_tokens": 0`
			`},`
			`"output": []`
			`}`
			`}`
			`]"#;`

			`// Start a mock model server`
			`let server = MockServer::start().await;`

			`// First response: model calls apply_patch to create test.md`
			`let first = ResponseTemplate::new(200)`
			`.insert_header("content-type", "text/event-stream")`
			`.set_body_raw(`
			`load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),`
			`"text/event-stream",`
			`);`

			`Mock::given(method("POST"))`
			`// .and(path("/v1/responses"))`
			`.respond_with(first)`
			`.up_to_n_times(1)`
			`.mount(&server)`
			`.await;`

			`// Second response: model calls apply_patch to update test.md`
			`let second = ResponseTemplate::new(200)`
			`.insert_header("content-type", "text/event-stream")`
			`.set_body_raw(`
			`load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),`
			`"text/event-stream",`
			`);`

			`Mock::given(method("POST"))`
			`.and(path("/v1/responses"))`
			`.respond_with(second)`
			`.up_to_n_times(1)`
			`.mount(&server)`
			`.await;`

			`let final_completed = ResponseTemplate::new(200)`
			`.insert_header("content-type", "text/event-stream")`
			`.set_body_raw(`
			`load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),`
			`"text/event-stream",`
			`);`

			`Mock::given(method("POST"))`
			`// .and(path("/v1/responses"))`
			`.respond_with(final_completed)`
			`.expect(1)`
			`.mount(&server)`
			`.await;`

			`let tmp_cwd = TempDir::new().unwrap();`
			`Command::cargo_bin("codex-exec")`
			`.context("should find binary for codex-exec")?`
			`.current_dir(tmp_cwd.path())`
			`.env("CODEX_HOME", tmp_cwd.path())`
			`.env("OPENAI_API_KEY", "dummy")`
			`.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))`
			`.arg("--skip-git-repo-check")`
			`.arg("-s")`
			`.arg("workspace-write")`
			`.arg("foo")`
			`.assert()`
			`.success();`

			`// Verify final file contents`
			`let final_path = tmp_cwd.path().join("test.md");`
			`let contents = std::fs::read_to_string(&final_path)`
			`.unwrap_or_else(\|e\| panic!("failed reading {}: {e}", final_path.display()));`
			`assert_eq!(contents, "Final text\n");`
			`Ok(())`
			`}`