2025-08-15 11:55:53 -04:00
|
|
|
#![allow(clippy::expect_used, clippy::unwrap_used)]
|
|
|
|
|
|
fix: support special --codex-run-as-apply-patch arg (#1702)
This introduces some special behavior to the CLIs that are using the
`codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then
it will run as if `apply_patch arg2` were invoked. This is important
because it means we can do things like:
```
SANDBOX_TYPE=landlock # or seatbelt for macOS
codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH
```
which gives us a way to run `apply_patch` while ensuring it adheres to
the sandbox the user specified.
While it would be nice to use the `arg0` trick like we are currently
doing for `codex-linux-sandbox`, there is no way to specify the `arg0`
for the underlying command when running under `/usr/bin/sandbox-exec`,
so it will not work for us in this case.
Admittedly, we could have also supported this via a custom environment
variable (e.g., `CODEX_ARG0`), but since environment variables are
inherited by child processes, that seemed like a potentially leakier
abstraction.
This change, as well as our existing reliance on checking `arg0`, place
additional requirements on those who include `codex-core`. Its
`README.md` has been updated to reflect this.
While we could have just added an `apply-patch` subcommand to the
`codex` multitool CLI, that would not be sufficient for the standalone
`codex-exec` CLI, which is something that we distribute as part of our
GitHub releases for those who know they will not be using the TUI and
therefore prefer to use a slightly smaller executable:
https://github.com/openai/codex/releases/tag/rust-v0.10.0
To that end, this PR adds an integration test to ensure that the
`--codex-run-as-apply-patch` option works with the standalone
`codex-exec` CLI.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702).
* #1705
* #1703
* __->__ #1702
* #1698
* #1697
2025-07-28 09:26:44 -07:00
|
|
|
use anyhow::Context;
|
|
|
|
|
use assert_cmd::prelude::*;
|
fix: run apply_patch calls through the sandbox (#1705)
Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.
Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.
Unfortunately, this approach afforded a loophole: symlinks!
* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.
The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.
This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:
```shell
#!/usr/bin/env bash
set -euo pipefail
# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?
# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"
cleanup() {
# Only remove if it looks sane and still exists
[[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
[[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}
trap cleanup EXIT
echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"
# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"
cat "${SANDBOX_DIR}/link-to-original.txt"
if [[ "$(uname)" == "Linux" ]]; then
SANDBOX_SUBCOMMAND=landlock
else
SANDBOX_SUBCOMMAND=seatbelt
fi
# Attempt the exploit
cd "${SANDBOX_DIR}"
codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true
cat "${EXPLOIT_DIR}/original.txt"
```
Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
2025-07-30 16:45:08 -07:00
|
|
|
use codex_core::CODEX_APPLY_PATCH_ARG1;
|
fix: support special --codex-run-as-apply-patch arg (#1702)
This introduces some special behavior to the CLIs that are using the
`codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then
it will run as if `apply_patch arg2` were invoked. This is important
because it means we can do things like:
```
SANDBOX_TYPE=landlock # or seatbelt for macOS
codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH
```
which gives us a way to run `apply_patch` while ensuring it adheres to
the sandbox the user specified.
While it would be nice to use the `arg0` trick like we are currently
doing for `codex-linux-sandbox`, there is no way to specify the `arg0`
for the underlying command when running under `/usr/bin/sandbox-exec`,
so it will not work for us in this case.
Admittedly, we could have also supported this via a custom environment
variable (e.g., `CODEX_ARG0`), but since environment variables are
inherited by child processes, that seemed like a potentially leakier
abstraction.
This change, as well as our existing reliance on checking `arg0`, place
additional requirements on those who include `codex-core`. Its
`README.md` has been updated to reflect this.
While we could have just added an `apply-patch` subcommand to the
`codex` multitool CLI, that would not be sufficient for the standalone
`codex-exec` CLI, which is something that we distribute as part of our
GitHub releases for those who know they will not be using the TUI and
therefore prefer to use a slightly smaller executable:
https://github.com/openai/codex/releases/tag/rust-v0.10.0
To that end, this PR adds an integration test to ensure that the
`--codex-run-as-apply-patch` option works with the standalone
`codex-exec` CLI.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702).
* #1705
* #1703
* __->__ #1702
* #1698
* #1697
2025-07-28 09:26:44 -07:00
|
|
|
use std::fs;
|
|
|
|
|
use std::process::Command;
|
|
|
|
|
use tempfile::tempdir;
|
|
|
|
|
|
|
|
|
|
/// While we may add an `apply-patch` subcommand to the `codex` CLI multitool
|
|
|
|
|
/// at some point, we must ensure that the smaller `codex-exec` CLI can still
|
|
|
|
|
/// emulate the `apply_patch` CLI.
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_standalone_exec_cli_can_use_apply_patch() -> anyhow::Result<()> {
|
|
|
|
|
let tmp = tempdir()?;
|
|
|
|
|
let relative_path = "source.txt";
|
|
|
|
|
let absolute_path = tmp.path().join(relative_path);
|
|
|
|
|
fs::write(&absolute_path, "original content\n")?;
|
|
|
|
|
|
|
|
|
|
Command::cargo_bin("codex-exec")
|
|
|
|
|
.context("should find binary for codex-exec")?
|
fix: run apply_patch calls through the sandbox (#1705)
Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.
Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.
Unfortunately, this approach afforded a loophole: symlinks!
* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.
The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.
This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:
```shell
#!/usr/bin/env bash
set -euo pipefail
# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?
# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"
cleanup() {
# Only remove if it looks sane and still exists
[[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
[[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}
trap cleanup EXIT
echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"
# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"
cat "${SANDBOX_DIR}/link-to-original.txt"
if [[ "$(uname)" == "Linux" ]]; then
SANDBOX_SUBCOMMAND=landlock
else
SANDBOX_SUBCOMMAND=seatbelt
fi
# Attempt the exploit
cd "${SANDBOX_DIR}"
codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true
cat "${EXPLOIT_DIR}/original.txt"
```
Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
2025-07-30 16:45:08 -07:00
|
|
|
.arg(CODEX_APPLY_PATCH_ARG1)
|
fix: support special --codex-run-as-apply-patch arg (#1702)
This introduces some special behavior to the CLIs that are using the
`codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then
it will run as if `apply_patch arg2` were invoked. This is important
because it means we can do things like:
```
SANDBOX_TYPE=landlock # or seatbelt for macOS
codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH
```
which gives us a way to run `apply_patch` while ensuring it adheres to
the sandbox the user specified.
While it would be nice to use the `arg0` trick like we are currently
doing for `codex-linux-sandbox`, there is no way to specify the `arg0`
for the underlying command when running under `/usr/bin/sandbox-exec`,
so it will not work for us in this case.
Admittedly, we could have also supported this via a custom environment
variable (e.g., `CODEX_ARG0`), but since environment variables are
inherited by child processes, that seemed like a potentially leakier
abstraction.
This change, as well as our existing reliance on checking `arg0`, place
additional requirements on those who include `codex-core`. Its
`README.md` has been updated to reflect this.
While we could have just added an `apply-patch` subcommand to the
`codex` multitool CLI, that would not be sufficient for the standalone
`codex-exec` CLI, which is something that we distribute as part of our
GitHub releases for those who know they will not be using the TUI and
therefore prefer to use a slightly smaller executable:
https://github.com/openai/codex/releases/tag/rust-v0.10.0
To that end, this PR adds an integration test to ensure that the
`--codex-run-as-apply-patch` option works with the standalone
`codex-exec` CLI.
---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702).
* #1705
* #1703
* __->__ #1702
* #1698
* #1697
2025-07-28 09:26:44 -07:00
|
|
|
.arg(
|
|
|
|
|
r#"*** Begin Patch
|
|
|
|
|
*** Update File: source.txt
|
|
|
|
|
@@
|
|
|
|
|
-original content
|
|
|
|
|
+modified by apply_patch
|
|
|
|
|
*** End Patch"#,
|
|
|
|
|
)
|
|
|
|
|
.current_dir(tmp.path())
|
|
|
|
|
.assert()
|
|
|
|
|
.success()
|
|
|
|
|
.stdout("Success. Updated the following files:\nM source.txt\n")
|
|
|
|
|
.stderr(predicates::str::is_empty());
|
|
|
|
|
assert_eq!(
|
|
|
|
|
fs::read_to_string(absolute_path)?,
|
|
|
|
|
"modified by apply_patch\n"
|
|
|
|
|
);
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|
2025-08-15 11:55:53 -04:00
|
|
|
|
|
|
|
|
#[cfg(not(target_os = "windows"))]
|
|
|
|
|
#[tokio::test]
|
|
|
|
|
async fn test_apply_patch_tool() -> anyhow::Result<()> {
|
|
|
|
|
use core_test_support::load_sse_fixture_with_id_from_str;
|
|
|
|
|
use tempfile::TempDir;
|
|
|
|
|
use wiremock::Mock;
|
|
|
|
|
use wiremock::MockServer;
|
|
|
|
|
use wiremock::ResponseTemplate;
|
|
|
|
|
use wiremock::matchers::method;
|
|
|
|
|
use wiremock::matchers::path;
|
|
|
|
|
|
|
|
|
|
const SSE_TOOL_CALL_ADD: &str = r#"[
|
|
|
|
|
{
|
|
|
|
|
"type": "response.output_item.done",
|
|
|
|
|
"item": {
|
|
|
|
|
"type": "function_call",
|
|
|
|
|
"name": "apply_patch",
|
|
|
|
|
"arguments": "{\n \"input\": \"*** Begin Patch\\n*** Add File: test.md\\n+Hello world\\n*** End Patch\"\n}",
|
|
|
|
|
"call_id": "__ID__"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"type": "response.completed",
|
|
|
|
|
"response": {
|
|
|
|
|
"id": "__ID__",
|
|
|
|
|
"usage": {
|
|
|
|
|
"input_tokens": 0,
|
|
|
|
|
"input_tokens_details": null,
|
|
|
|
|
"output_tokens": 0,
|
|
|
|
|
"output_tokens_details": null,
|
|
|
|
|
"total_tokens": 0
|
|
|
|
|
},
|
|
|
|
|
"output": []
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]"#;
|
|
|
|
|
|
|
|
|
|
const SSE_TOOL_CALL_UPDATE: &str = r#"[
|
|
|
|
|
{
|
|
|
|
|
"type": "response.output_item.done",
|
|
|
|
|
"item": {
|
|
|
|
|
"type": "function_call",
|
|
|
|
|
"name": "apply_patch",
|
|
|
|
|
"arguments": "{\n \"input\": \"*** Begin Patch\\n*** Update File: test.md\\n@@\\n-Hello world\\n+Final text\\n*** End Patch\"\n}",
|
|
|
|
|
"call_id": "__ID__"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"type": "response.completed",
|
|
|
|
|
"response": {
|
|
|
|
|
"id": "__ID__",
|
|
|
|
|
"usage": {
|
|
|
|
|
"input_tokens": 0,
|
|
|
|
|
"input_tokens_details": null,
|
|
|
|
|
"output_tokens": 0,
|
|
|
|
|
"output_tokens_details": null,
|
|
|
|
|
"total_tokens": 0
|
|
|
|
|
},
|
|
|
|
|
"output": []
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]"#;
|
|
|
|
|
|
|
|
|
|
const SSE_TOOL_CALL_COMPLETED: &str = r#"[
|
|
|
|
|
{
|
|
|
|
|
"type": "response.completed",
|
|
|
|
|
"response": {
|
|
|
|
|
"id": "__ID__",
|
|
|
|
|
"usage": {
|
|
|
|
|
"input_tokens": 0,
|
|
|
|
|
"input_tokens_details": null,
|
|
|
|
|
"output_tokens": 0,
|
|
|
|
|
"output_tokens_details": null,
|
|
|
|
|
"total_tokens": 0
|
|
|
|
|
},
|
|
|
|
|
"output": []
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]"#;
|
|
|
|
|
|
|
|
|
|
// Start a mock model server
|
|
|
|
|
let server = MockServer::start().await;
|
|
|
|
|
|
|
|
|
|
// First response: model calls apply_patch to create test.md
|
|
|
|
|
let first = ResponseTemplate::new(200)
|
|
|
|
|
.insert_header("content-type", "text/event-stream")
|
|
|
|
|
.set_body_raw(
|
|
|
|
|
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_ADD, "call1"),
|
|
|
|
|
"text/event-stream",
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Mock::given(method("POST"))
|
|
|
|
|
// .and(path("/v1/responses"))
|
|
|
|
|
.respond_with(first)
|
|
|
|
|
.up_to_n_times(1)
|
|
|
|
|
.mount(&server)
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
// Second response: model calls apply_patch to update test.md
|
|
|
|
|
let second = ResponseTemplate::new(200)
|
|
|
|
|
.insert_header("content-type", "text/event-stream")
|
|
|
|
|
.set_body_raw(
|
|
|
|
|
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_UPDATE, "call2"),
|
|
|
|
|
"text/event-stream",
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Mock::given(method("POST"))
|
|
|
|
|
.and(path("/v1/responses"))
|
|
|
|
|
.respond_with(second)
|
|
|
|
|
.up_to_n_times(1)
|
|
|
|
|
.mount(&server)
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
let final_completed = ResponseTemplate::new(200)
|
|
|
|
|
.insert_header("content-type", "text/event-stream")
|
|
|
|
|
.set_body_raw(
|
|
|
|
|
load_sse_fixture_with_id_from_str(SSE_TOOL_CALL_COMPLETED, "resp3"),
|
|
|
|
|
"text/event-stream",
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
Mock::given(method("POST"))
|
|
|
|
|
// .and(path("/v1/responses"))
|
|
|
|
|
.respond_with(final_completed)
|
|
|
|
|
.expect(1)
|
|
|
|
|
.mount(&server)
|
|
|
|
|
.await;
|
|
|
|
|
|
|
|
|
|
let tmp_cwd = TempDir::new().unwrap();
|
|
|
|
|
Command::cargo_bin("codex-exec")
|
|
|
|
|
.context("should find binary for codex-exec")?
|
|
|
|
|
.current_dir(tmp_cwd.path())
|
|
|
|
|
.env("CODEX_HOME", tmp_cwd.path())
|
|
|
|
|
.env("OPENAI_API_KEY", "dummy")
|
|
|
|
|
.env("OPENAI_BASE_URL", format!("{}/v1", server.uri()))
|
|
|
|
|
.arg("--skip-git-repo-check")
|
|
|
|
|
.arg("-s")
|
|
|
|
|
.arg("workspace-write")
|
|
|
|
|
.arg("foo")
|
|
|
|
|
.assert()
|
|
|
|
|
.success();
|
|
|
|
|
|
|
|
|
|
// Verify final file contents
|
|
|
|
|
let final_path = tmp_cwd.path().join("test.md");
|
|
|
|
|
let contents = std::fs::read_to_string(&final_path)
|
|
|
|
|
.unwrap_or_else(|e| panic!("failed reading {}: {e}", final_path.display()));
|
|
|
|
|
assert_eq!(contents, "Final text\n");
|
|
|
|
|
Ok(())
|
|
|
|
|
}
|