codex-rs/arg0/src/lib.rs

use std::future::Future;
use std::path::Path;
use std::path::PathBuf;

use codex_core::CODEX_APPLY_PATCH_ARG1;
#[cfg(unix)]
use std::os::unix::fs::symlink;
use tempfile::TempDir;

const LINUX_SANDBOX_ARG0: &str = "codex-linux-sandbox";
const APPLY_PATCH_ARG0: &str = "apply_patch";
const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch";

/// While we want to deploy the Codex CLI as a single executable for simplicity,
/// we also want to expose some of its functionality as distinct CLIs, so we use
/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows
/// us to simulate deploying multiple executables as a single binary on Mac and
/// Linux (but not Windows).
///
/// When the current executable is invoked through the hard-link or alias named
/// `codex-linux-sandbox` we *directly* execute
/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we:
///
/// 1.  Use [`dotenvy::from_path`] and [`dotenvy::dotenv`] to modify the
///     environment before creating any threads.
/// 2.  Construct a Tokio multi-thread runtime.
/// 3.  Derive the path to the current executable (so children can re-invoke the
///     sandbox) when running on Linux.
/// 4.  Execute the provided async `main_fn` inside that runtime, forwarding any
///     error. Note that `main_fn` receives `codex_linux_sandbox_exe:
///     Option<PathBuf>`, as an argument, which is generally needed as part of
///     constructing [`codex_core::config::Config`].
///
/// This function should be used to wrap any `main()` function in binary crates
/// in this workspace that depends on these helper CLIs.
pub fn arg0_dispatch_or_else<F, Fut>(main_fn: F) -> anyhow::Result<()>
where
    F: FnOnce(Option<PathBuf>) -> Fut,
    Fut: Future<Output = anyhow::Result<()>>,
{
    // Determine if we were invoked via the special alias.
    let mut args = std::env::args_os();
    let argv0 = args.next().unwrap_or_default();
    let exe_name = Path::new(&argv0)
        .file_name()
        .and_then(|s| s.to_str())
        .unwrap_or("");

    if exe_name == LINUX_SANDBOX_ARG0 {
        // Safety: [`run_main`] never returns.
        codex_linux_sandbox::run_main();
    } else if exe_name == APPLY_PATCH_ARG0 || exe_name == MISSPELLED_APPLY_PATCH_ARG0 {
        codex_apply_patch::main();
    }

    let argv1 = args.next().unwrap_or_default();
    if argv1 == CODEX_APPLY_PATCH_ARG1 {
        let patch_arg = args.next().and_then(|s| s.to_str().map(|s| s.to_owned()));
        let exit_code = match patch_arg {
            Some(patch_arg) => {
                let mut stdout = std::io::stdout();
                let mut stderr = std::io::stderr();
                match codex_apply_patch::apply_patch(&patch_arg, &mut stdout, &mut stderr) {
                    Ok(()) => 0,
                    Err(_) => 1,
                }
            }
            None => {
                eprintln!("Error: {CODEX_APPLY_PATCH_ARG1} requires a UTF-8 PATCH argument.");
                1
            }
        };
        std::process::exit(exit_code);
    }

    // This modifies the environment, which is not thread-safe, so do this
    // before creating any threads/the Tokio runtime.
    load_dotenv();

    // Retain the TempDir so it exists for the lifetime of the invocation of
    // this executable. Admittedly, we could invoke `keep()` on it, but it
    // would be nice to avoid leaving temporary directories behind, if possible.
    let _path_entry = match prepend_path_entry_for_apply_patch() {
        Ok(path_entry) => Some(path_entry),
        Err(err) => {
            // It is possible that Codex will proceed successfully even if
            // updating the PATH fails, so warn the user and move on.
            eprintln!("WARNING: proceeding, even though we could not update PATH: {err}");
            None
        }
    };

    // Regular invocation – create a Tokio runtime and execute the provided
    // async entry-point.
    let runtime = tokio::runtime::Runtime::new()?;
    runtime.block_on(async move {
        let codex_linux_sandbox_exe: Option<PathBuf> = if cfg!(target_os = "linux") {
            std::env::current_exe().ok()
        } else {
            None
        };

        main_fn(codex_linux_sandbox_exe).await
    })
}

const ILLEGAL_ENV_VAR_PREFIX: &str = "CODEX_";

/// Load env vars from ~/.codex/.env and `$(pwd)/.env`.
///
/// Security: Do not allow `.env` files to create or modify any variables
/// with names starting with `CODEX_`.
fn load_dotenv() {
    if let Ok(codex_home) = codex_core::config::find_codex_home()
        && let Ok(iter) = dotenvy::from_path_iter(codex_home.join(".env"))
    {
        set_filtered(iter);
    }

    if let Ok(iter) = dotenvy::dotenv_iter() {
        set_filtered(iter);
    }
}

/// Helper to set vars from a dotenvy iterator while filtering out `CODEX_` keys.
fn set_filtered<I>(iter: I)
where
    I: IntoIterator<Item = Result<(String, String), dotenvy::Error>>,
{
    for (key, value) in iter.into_iter().flatten() {
        if !key.to_ascii_uppercase().starts_with(ILLEGAL_ENV_VAR_PREFIX) {
            // It is safe to call set_var() because our process is
            // single-threaded at this point in its execution.
            unsafe { std::env::set_var(&key, &value) };
        }
    }
}

/// Creates a temporary directory with either:
///
/// - UNIX: `apply_patch` symlink to the current executable
/// - WINDOWS: `apply_patch.bat` batch script to invoke the current executable
///   with the "secret" --codex-run-as-apply-patch flag.
///
/// This temporary directory is prepended to the PATH environment variable so
/// that `apply_patch` can be on the PATH without requiring the user to
/// install a separate `apply_patch` executable, simplifying the deployment of
/// Codex CLI.
///
/// IMPORTANT: This function modifies the PATH environment variable, so it MUST
/// be called before multiple threads are spawned.
fn prepend_path_entry_for_apply_patch() -> std::io::Result<TempDir> {
    let temp_dir = TempDir::new()?;
    let path = temp_dir.path();

    for filename in &[APPLY_PATCH_ARG0, MISSPELLED_APPLY_PATCH_ARG0] {
        let exe = std::env::current_exe()?;

        #[cfg(unix)]
        {
            let link = path.join(filename);
            symlink(&exe, &link)?;
        }

        #[cfg(windows)]
        {
            let batch_script = path.join(format!("{filename}.bat"));
            std::fs::write(
                &batch_script,
                format!(
                    r#"@echo off
"{}" {CODEX_APPLY_PATCH_ARG1} %*
"#,
                    exe.display()
                ),
            )?;
        }
    }

    #[cfg(unix)]
    const PATH_SEPARATOR: &str = ":";

    #[cfg(windows)]
    const PATH_SEPARATOR: &str = ";";

    let path_element = path.display();
    let updated_path_env_var = match std::env::var("PATH") {
        Ok(existing_path) => {
            format!("{path_element}{PATH_SEPARATOR}{existing_path}")
        }
        Err(_) => {
            format!("{path_element}")
        }
    };

    unsafe {
        std::env::set_var("PATH", updated_path_env_var);
    }

    Ok(temp_dir)
}
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								use std::future::Future;
 								use std::path::Path;
 								use std::path::PathBuf;
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								use codex_core::CODEX_APPLY_PATCH_ARG1;
-												feat: use the arg0 trick with apply_patch (#2646)

Historically, Codex CLI has treated `apply_patch` (and its sometimes
misspelling, `applypatch`) as a "virtual CLI," intercepting it when it
appears as the first arg to `command` for the `"container.exec",
`"shell"`, or `"local_shell"` tools.

This approach has a known limitation where if, say, the model created a
Python script that runs `apply_patch` and then tried to run the Python
script, we have no insight as to what the model is trying to do and the
Python Script would fail because `apply_patch` was never really on the
`PATH`.

One way to solve this problem is to require users to install an
`apply_patch` executable alongside the `codex` executable (or at least
put it someplace where Codex can discover it). Though to keep Codex CLI
as a standalone executable, we exploit "the arg0 trick" where we create
a temporary directory with an entry named `apply_patch` and prepend that
directory to the `PATH` for the duration of the invocation of Codex.

- On UNIX, `apply_patch` is a symlink to `codex`, which now changes its
behavior to behave like `apply_patch` if arg0 is `apply_patch` (or
`applypatch`)
- On Windows, `apply_patch.bat` is a batch script that runs `codex
--codex-run-as-apply-patch %*`, as Codex also changes its behavior if
the first argument is `--codex-run-as-apply-patch`.
											
										
										
											2025-08-24 14:35:51 -07:00
+								#[cfg(unix)]
 								use std::os::unix::fs::symlink;
 								use tempfile::TempDir;
 								const LINUX_SANDBOX_ARG0: &str = "codex-linux-sandbox";
 								const APPLY_PATCH_ARG0: &str = "apply_patch";
 								const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch";
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								/// While we want to deploy the Codex CLI as a single executable for simplicity,
 								/// we also want to expose some of its functionality as distinct CLIs, so we use
 								/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows
 								/// us to simulate deploying multiple executables as a single binary on Mac and
 								/// Linux (but not Windows).
 								///
 								/// When the current executable is invoked through the hard-link or alias named
 								/// `codex-linux-sandbox` we *directly* execute
 								/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we:
 								///
 								/// 1.  Use [`dotenvy::from_path`] and [`dotenvy::dotenv`] to modify the
 								///     environment before creating any threads.
 								/// 2.  Construct a Tokio multi-thread runtime.
 								/// 3.  Derive the path to the current executable (so children can re-invoke the
 								///     sandbox) when running on Linux.
 								/// 4.  Execute the provided async `main_fn` inside that runtime, forwarding any
 								///     error. Note that `main_fn` receives `codex_linux_sandbox_exe:
 								///     Option<PathBuf>`, as an argument, which is generally needed as part of
 								///     constructing [`codex_core::config::Config`].
 								///
 								/// This function should be used to wrap any `main()` function in binary crates
 								/// in this workspace that depends on these helper CLIs.
 								pub fn arg0_dispatch_or_else<F, Fut>(main_fn: F) -> anyhow::Result<()>
 								where
 								    F: FnOnce(Option<PathBuf>) -> Fut,
 								    Fut: Future<Output = anyhow::Result<()>>,
 								{
 								    // Determine if we were invoked via the special alias.
-												fix: support special --codex-run-as-apply-patch arg (#1702)

This introduces some special behavior to the CLIs that are using the
`codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then
it will run as if `apply_patch arg2` were invoked. This is important
because it means we can do things like:

```
SANDBOX_TYPE=landlock # or seatbelt for macOS
codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH
```

which gives us a way to run `apply_patch` while ensuring it adheres to
the sandbox the user specified.

While it would be nice to use the `arg0` trick like we are currently
doing for `codex-linux-sandbox`, there is no way to specify the `arg0`
for the underlying command when running under `/usr/bin/sandbox-exec`,
so it will not work for us in this case.

Admittedly, we could have also supported this via a custom environment
variable (e.g., `CODEX_ARG0`), but since environment variables are
inherited by child processes, that seemed like a potentially leakier
abstraction.

This change, as well as our existing reliance on checking `arg0`, place
additional requirements on those who include `codex-core`. Its
`README.md` has been updated to reflect this.

While we could have just added an `apply-patch` subcommand to the
`codex` multitool CLI, that would not be sufficient for the standalone
`codex-exec` CLI, which is something that we distribute as part of our
GitHub releases for those who know they will not be using the TUI and
therefore prefer to use a slightly smaller executable:

https://github.com/openai/codex/releases/tag/rust-v0.10.0

To that end, this PR adds an integration test to ensure that the
`--codex-run-as-apply-patch` option works with the standalone
`codex-exec` CLI.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702).
* #1705
* #1703
* __->__ #1702
* #1698
* #1697
											
										
										
											2025-07-28 09:26:44 -07:00
+								    let mut args = std::env::args_os();
 								    let argv0 = args.next().unwrap_or_default();
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								    let exe_name = Path::new(&argv0)
 								        .file_name()
 								        .and_then(|s| s.to_str())
 								        .unwrap_or("");
-												feat: use the arg0 trick with apply_patch (#2646)

Historically, Codex CLI has treated `apply_patch` (and its sometimes
misspelling, `applypatch`) as a "virtual CLI," intercepting it when it
appears as the first arg to `command` for the `"container.exec",
`"shell"`, or `"local_shell"` tools.

This approach has a known limitation where if, say, the model created a
Python script that runs `apply_patch` and then tried to run the Python
script, we have no insight as to what the model is trying to do and the
Python Script would fail because `apply_patch` was never really on the
`PATH`.

One way to solve this problem is to require users to install an
`apply_patch` executable alongside the `codex` executable (or at least
put it someplace where Codex can discover it). Though to keep Codex CLI
as a standalone executable, we exploit "the arg0 trick" where we create
a temporary directory with an entry named `apply_patch` and prepend that
directory to the `PATH` for the duration of the invocation of Codex.

- On UNIX, `apply_patch` is a symlink to `codex`, which now changes its
behavior to behave like `apply_patch` if arg0 is `apply_patch` (or
`applypatch`)
- On Windows, `apply_patch.bat` is a batch script that runs `codex
--codex-run-as-apply-patch %*`, as Codex also changes its behavior if
the first argument is `--codex-run-as-apply-patch`.
											
										
										
											2025-08-24 14:35:51 -07:00
+								    if exe_name == LINUX_SANDBOX_ARG0 {
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								        // Safety: [`run_main`] never returns.
 								        codex_linux_sandbox::run_main();
-												feat: use the arg0 trick with apply_patch (#2646)

Historically, Codex CLI has treated `apply_patch` (and its sometimes
misspelling, `applypatch`) as a "virtual CLI," intercepting it when it
appears as the first arg to `command` for the `"container.exec",
`"shell"`, or `"local_shell"` tools.

This approach has a known limitation where if, say, the model created a
Python script that runs `apply_patch` and then tried to run the Python
script, we have no insight as to what the model is trying to do and the
Python Script would fail because `apply_patch` was never really on the
`PATH`.

One way to solve this problem is to require users to install an
`apply_patch` executable alongside the `codex` executable (or at least
put it someplace where Codex can discover it). Though to keep Codex CLI
as a standalone executable, we exploit "the arg0 trick" where we create
a temporary directory with an entry named `apply_patch` and prepend that
directory to the `PATH` for the duration of the invocation of Codex.

- On UNIX, `apply_patch` is a symlink to `codex`, which now changes its
behavior to behave like `apply_patch` if arg0 is `apply_patch` (or
`applypatch`)
- On Windows, `apply_patch.bat` is a batch script that runs `codex
--codex-run-as-apply-patch %*`, as Codex also changes its behavior if
the first argument is `--codex-run-as-apply-patch`.
											
										
										
											2025-08-24 14:35:51 -07:00
+								    } else if exe_name == APPLY_PATCH_ARG0 || exe_name == MISSPELLED_APPLY_PATCH_ARG0 {
 								        codex_apply_patch::main();
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								    }
-												fix: support special --codex-run-as-apply-patch arg (#1702)

This introduces some special behavior to the CLIs that are using the
`codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then
it will run as if `apply_patch arg2` were invoked. This is important
because it means we can do things like:

```
SANDBOX_TYPE=landlock # or seatbelt for macOS
codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH
```

which gives us a way to run `apply_patch` while ensuring it adheres to
the sandbox the user specified.

While it would be nice to use the `arg0` trick like we are currently
doing for `codex-linux-sandbox`, there is no way to specify the `arg0`
for the underlying command when running under `/usr/bin/sandbox-exec`,
so it will not work for us in this case.

Admittedly, we could have also supported this via a custom environment
variable (e.g., `CODEX_ARG0`), but since environment variables are
inherited by child processes, that seemed like a potentially leakier
abstraction.

This change, as well as our existing reliance on checking `arg0`, place
additional requirements on those who include `codex-core`. Its
`README.md` has been updated to reflect this.

While we could have just added an `apply-patch` subcommand to the
`codex` multitool CLI, that would not be sufficient for the standalone
`codex-exec` CLI, which is something that we distribute as part of our
GitHub releases for those who know they will not be using the TUI and
therefore prefer to use a slightly smaller executable:

https://github.com/openai/codex/releases/tag/rust-v0.10.0

To that end, this PR adds an integration test to ensure that the
`--codex-run-as-apply-patch` option works with the standalone
`codex-exec` CLI.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702).
* #1705
* #1703
* __->__ #1702
* #1698
* #1697
											
										
										
											2025-07-28 09:26:44 -07:00
+								    let argv1 = args.next().unwrap_or_default();
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								    if argv1 == CODEX_APPLY_PATCH_ARG1 {
-												fix: support special --codex-run-as-apply-patch arg (#1702)

This introduces some special behavior to the CLIs that are using the
`codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then
it will run as if `apply_patch arg2` were invoked. This is important
because it means we can do things like:

```
SANDBOX_TYPE=landlock # or seatbelt for macOS
codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH
```

which gives us a way to run `apply_patch` while ensuring it adheres to
the sandbox the user specified.

While it would be nice to use the `arg0` trick like we are currently
doing for `codex-linux-sandbox`, there is no way to specify the `arg0`
for the underlying command when running under `/usr/bin/sandbox-exec`,
so it will not work for us in this case.

Admittedly, we could have also supported this via a custom environment
variable (e.g., `CODEX_ARG0`), but since environment variables are
inherited by child processes, that seemed like a potentially leakier
abstraction.

This change, as well as our existing reliance on checking `arg0`, place
additional requirements on those who include `codex-core`. Its
`README.md` has been updated to reflect this.

While we could have just added an `apply-patch` subcommand to the
`codex` multitool CLI, that would not be sufficient for the standalone
`codex-exec` CLI, which is something that we distribute as part of our
GitHub releases for those who know they will not be using the TUI and
therefore prefer to use a slightly smaller executable:

https://github.com/openai/codex/releases/tag/rust-v0.10.0

To that end, this PR adds an integration test to ensure that the
`--codex-run-as-apply-patch` option works with the standalone
`codex-exec` CLI.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702).
* #1705
* #1703
* __->__ #1702
* #1698
* #1697
											
										
										
											2025-07-28 09:26:44 -07:00
+								        let patch_arg = args.next().and_then(|s| s.to_str().map(|s| s.to_owned()));
 								        let exit_code = match patch_arg {
 								            Some(patch_arg) => {
 								                let mut stdout = std::io::stdout();
 								                let mut stderr = std::io::stderr();
 								                match codex_apply_patch::apply_patch(&patch_arg, &mut stdout, &mut stderr) {
 								                    Ok(()) => 0,
 								                    Err(_) => 1,
 								                }
 								            }
 								            None => {
-												fix: run apply_patch calls through the sandbox (#1705)

Building on the work of https://github.com/openai/codex/pull/1702, this
changes how a shell call to `apply_patch` is handled.

Previously, a shell call to `apply_patch` was always handled in-process,
never leveraging a sandbox. To determine whether the `apply_patch`
operation could be auto-approved, the
`is_write_patch_constrained_to_writable_paths()` function would check if
all the paths listed in the paths were writable. If so, the agent would
apply the changes listed in the patch.

Unfortunately, this approach afforded a loophole: symlinks!

* For a soft link, we could fix this issue by tracing the link and
checking whether the target is in the set of writable paths, however...
* ...For a hard link, things are not as simple. We can run `stat FILE`
to see if the number of links is greater than 1, but then we would have
to do something potentially expensive like `find . -inum <inode_number>`
to find the other paths for `FILE`. Further, even if this worked, this
approach runs the risk of a
[TOCTOU](https://en.wikipedia.org/wiki/Time-of-check_to_time-of-use)
race condition, so it is not robust.

The solution, implemented in this PR, is to take the virtual execution
of the `apply_patch` CLI into an _actual_ execution using `codex
--codex-run-as-apply-patch PATCH`, which we can run under the sandbox
the user specified, just like any other `shell` call.

This, of course, assumes that the sandbox prevents writing through
symlinks as a mechanism to write to folders that are not in the writable
set configured by the sandbox. I verified this by testing the following
on both Mac and Linux:

```shell
#!/usr/bin/env bash
set -euo pipefail

# Can running a command in SANDBOX_DIR write a file in EXPLOIT_DIR?

# Codex is run in SANDBOX_DIR, so writes should be constrianed to this directory.
SANDBOX_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)
# EXPLOIT_DIR is outside of SANDBOX_DIR, so let's see if we can write to it.
EXPLOIT_DIR=$(mktemp -d -p "$HOME" sandboxtesttemp.XXXXXX)

echo "SANDBOX_DIR: $SANDBOX_DIR"
echo "EXPLOIT_DIR: $EXPLOIT_DIR"

cleanup() {
  # Only remove if it looks sane and still exists
  [[ -n "${SANDBOX_DIR:-}" && -d "$SANDBOX_DIR" ]] && rm -rf -- "$SANDBOX_DIR"
  [[ -n "${EXPLOIT_DIR:-}" && -d "$EXPLOIT_DIR" ]] && rm -rf -- "$EXPLOIT_DIR"
}

trap cleanup EXIT

echo "I am the original content" > "${EXPLOIT_DIR}/original.txt"

# Drop the -s to test hard links.
ln -s "${EXPLOIT_DIR}/original.txt" "${SANDBOX_DIR}/link-to-original.txt"

cat "${SANDBOX_DIR}/link-to-original.txt"

if [[ "$(uname)" == "Linux" ]]; then
    SANDBOX_SUBCOMMAND=landlock
else
    SANDBOX_SUBCOMMAND=seatbelt
fi

# Attempt the exploit
cd "${SANDBOX_DIR}"

codex debug "${SANDBOX_SUBCOMMAND}" bash -lc "echo pwned > ./link-to-original.txt" || true

cat "${EXPLOIT_DIR}/original.txt"
```

Admittedly, this change merits a proper integration test, but I think I
will have to do that in a follow-up PR.
											
										
										
											2025-07-30 16:45:08 -07:00
+								                eprintln!("Error: {CODEX_APPLY_PATCH_ARG1} requires a UTF-8 PATCH argument.");
-												fix: support special --codex-run-as-apply-patch arg (#1702)

This introduces some special behavior to the CLIs that are using the
`codex-arg0` crate where if `arg1` is `--codex-run-as-apply-patch`, then
it will run as if `apply_patch arg2` were invoked. This is important
because it means we can do things like:

```
SANDBOX_TYPE=landlock # or seatbelt for macOS
codex debug "${SANDBOX_TYPE}" -- codex --codex-run-as-apply-patch PATCH
```

which gives us a way to run `apply_patch` while ensuring it adheres to
the sandbox the user specified.

While it would be nice to use the `arg0` trick like we are currently
doing for `codex-linux-sandbox`, there is no way to specify the `arg0`
for the underlying command when running under `/usr/bin/sandbox-exec`,
so it will not work for us in this case.

Admittedly, we could have also supported this via a custom environment
variable (e.g., `CODEX_ARG0`), but since environment variables are
inherited by child processes, that seemed like a potentially leakier
abstraction.

This change, as well as our existing reliance on checking `arg0`, place
additional requirements on those who include `codex-core`. Its
`README.md` has been updated to reflect this.

While we could have just added an `apply-patch` subcommand to the
`codex` multitool CLI, that would not be sufficient for the standalone
`codex-exec` CLI, which is something that we distribute as part of our
GitHub releases for those who know they will not be using the TUI and
therefore prefer to use a slightly smaller executable:

https://github.com/openai/codex/releases/tag/rust-v0.10.0

To that end, this PR adds an integration test to ensure that the
`--codex-run-as-apply-patch` option works with the standalone
`codex-exec` CLI.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1702).
* #1705
* #1703
* __->__ #1702
* #1698
* #1697
											
										
										
											2025-07-28 09:26:44 -07:00
 								            }
 								        };
 								        std::process::exit(exit_code);
 								    }
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								    // This modifies the environment, which is not thread-safe, so do this
 								    // before creating any threads/the Tokio runtime.
 								    load_dotenv();
-												feat: use the arg0 trick with apply_patch (#2646)

Historically, Codex CLI has treated `apply_patch` (and its sometimes
misspelling, `applypatch`) as a "virtual CLI," intercepting it when it
appears as the first arg to `command` for the `"container.exec",
`"shell"`, or `"local_shell"` tools.

This approach has a known limitation where if, say, the model created a
Python script that runs `apply_patch` and then tried to run the Python
script, we have no insight as to what the model is trying to do and the
Python Script would fail because `apply_patch` was never really on the
`PATH`.

One way to solve this problem is to require users to install an
`apply_patch` executable alongside the `codex` executable (or at least
put it someplace where Codex can discover it). Though to keep Codex CLI
as a standalone executable, we exploit "the arg0 trick" where we create
a temporary directory with an entry named `apply_patch` and prepend that
directory to the `PATH` for the duration of the invocation of Codex.

- On UNIX, `apply_patch` is a symlink to `codex`, which now changes its
behavior to behave like `apply_patch` if arg0 is `apply_patch` (or
`applypatch`)
- On Windows, `apply_patch.bat` is a batch script that runs `codex
--codex-run-as-apply-patch %*`, as Codex also changes its behavior if
the first argument is `--codex-run-as-apply-patch`.
											
										
										
											2025-08-24 14:35:51 -07:00
+								    // Retain the TempDir so it exists for the lifetime of the invocation of
 								    // this executable. Admittedly, we could invoke `keep()` on it, but it
 								    // would be nice to avoid leaving temporary directories behind, if possible.
 								    let _path_entry = match prepend_path_entry_for_apply_patch() {
 								        Ok(path_entry) => Some(path_entry),
 								        Err(err) => {
 								            // It is possible that Codex will proceed successfully even if
 								            // updating the PATH fails, so warn the user and move on.
 								            eprintln!("WARNING: proceeding, even though we could not update PATH: {err}");
 								            None
 								        }
 								    };
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								    // Regular invocation – create a Tokio runtime and execute the provided
 								    // async entry-point.
 								    let runtime = tokio::runtime::Runtime::new()?;
 								    runtime.block_on(async move {
 								        let codex_linux_sandbox_exe: Option<PathBuf> = if cfg!(target_os = "linux") {
 								            std::env::current_exe().ok()
 								        } else {
 								            None
 								        };
 								        main_fn(codex_linux_sandbox_exe).await
 								    })
 								}
-												fix: do not allow dotenv to create/modify environment variables starting with CODEX_ (#2308)

This ensures Codex cannot drop a `.env` file with a value of
`CODEX_HOME` that points to a folder that Codex can control.
											
										
										
											2025-08-14 13:57:15 -07:00
+								const ILLEGAL_ENV_VAR_PREFIX: &str = "CODEX_";
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								/// Load env vars from ~/.codex/.env and `$(pwd)/.env`.
-												fix: do not allow dotenv to create/modify environment variables starting with CODEX_ (#2308)

This ensures Codex cannot drop a `.env` file with a value of
`CODEX_HOME` that points to a folder that Codex can control.
											
										
										
											2025-08-14 13:57:15 -07:00
+								///
 								/// Security: Do not allow `.env` files to create or modify any variables
 								/// with names starting with `CODEX_`.
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								fn load_dotenv() {
-												chore: upgrade to Rust 1.89 (#2465)

Codex created this PR from the following prompt:

> upgrade this entire repo to Rust 1.89. Note that this requires
updating codex-rs/rust-toolchain.toml as well as the workflows in
.github/. Make sure that things are "clippy clean" as this change will
likely uncover new Clippy errors. `just fmt` and `cargo clippy --tests`
are sufficient to check for correctness

Note this modifies a lot of lines because it folds nested `if`
statements using `&&`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2465).
* #2467
* __->__ #2465
											
										
										
											2025-08-19 13:22:02 -07:00
+								    if let Ok(codex_home) = codex_core::config::find_codex_home()
 								        && let Ok(iter) = dotenvy::from_path_iter(codex_home.join(".env"))
 								    {
 								        set_filtered(iter);
-												fix: do not allow dotenv to create/modify environment variables starting with CODEX_ (#2308)

This ensures Codex cannot drop a `.env` file with a value of
`CODEX_HOME` that points to a folder that Codex can control.
											
										
										
											2025-08-14 13:57:15 -07:00
+								    }
 								    if let Ok(iter) = dotenvy::dotenv_iter() {
 								        set_filtered(iter);
 								    }
 								}
 								/// Helper to set vars from a dotenvy iterator while filtering out `CODEX_` keys.
 								fn set_filtered<I>(iter: I)
 								where
 								    I: IntoIterator<Item = Result<(String, String), dotenvy::Error>>,
 								{
 								    for (key, value) in iter.into_iter().flatten() {
 								        if !key.to_ascii_uppercase().starts_with(ILLEGAL_ENV_VAR_PREFIX) {
 								            // It is safe to call set_var() because our process is
 								            // single-threaded at this point in its execution.
 								            unsafe { std::env::set_var(&key, &value) };
 								        }
-												fix: move arg0 handling out of codex-linux-sandbox and into its own crate (#1697)


											
										
										
											2025-07-28 08:31:24 -07:00
+								    }
 								}
-												feat: use the arg0 trick with apply_patch (#2646)

Historically, Codex CLI has treated `apply_patch` (and its sometimes
misspelling, `applypatch`) as a "virtual CLI," intercepting it when it
appears as the first arg to `command` for the `"container.exec",
`"shell"`, or `"local_shell"` tools.

This approach has a known limitation where if, say, the model created a
Python script that runs `apply_patch` and then tried to run the Python
script, we have no insight as to what the model is trying to do and the
Python Script would fail because `apply_patch` was never really on the
`PATH`.

One way to solve this problem is to require users to install an
`apply_patch` executable alongside the `codex` executable (or at least
put it someplace where Codex can discover it). Though to keep Codex CLI
as a standalone executable, we exploit "the arg0 trick" where we create
a temporary directory with an entry named `apply_patch` and prepend that
directory to the `PATH` for the duration of the invocation of Codex.

- On UNIX, `apply_patch` is a symlink to `codex`, which now changes its
behavior to behave like `apply_patch` if arg0 is `apply_patch` (or
`applypatch`)
- On Windows, `apply_patch.bat` is a batch script that runs `codex
--codex-run-as-apply-patch %*`, as Codex also changes its behavior if
the first argument is `--codex-run-as-apply-patch`.
											
										
										
											2025-08-24 14:35:51 -07:00
 								/// Creates a temporary directory with either:
 								///
 								/// - UNIX: `apply_patch` symlink to the current executable
 								/// - WINDOWS: `apply_patch.bat` batch script to invoke the current executable
 								///   with the "secret" --codex-run-as-apply-patch flag.
 								///
 								/// This temporary directory is prepended to the PATH environment variable so
 								/// that `apply_patch` can be on the PATH without requiring the user to
 								/// install a separate `apply_patch` executable, simplifying the deployment of
 								/// Codex CLI.
 								///
 								/// IMPORTANT: This function modifies the PATH environment variable, so it MUST
 								/// be called before multiple threads are spawned.
 								fn prepend_path_entry_for_apply_patch() -> std::io::Result<TempDir> {
 								    let temp_dir = TempDir::new()?;
 								    let path = temp_dir.path();
 								    for filename in &[APPLY_PATCH_ARG0, MISSPELLED_APPLY_PATCH_ARG0] {
 								        let exe = std::env::current_exe()?;
 								        #[cfg(unix)]
 								        {
 								            let link = path.join(filename);
 								            symlink(&exe, &link)?;
 								        }
 								        #[cfg(windows)]
 								        {
 								            let batch_script = path.join(format!("{filename}.bat"));
 								            std::fs::write(
 								                &batch_script,
 								                format!(
 								                    r#"@echo off
 								"{}" {CODEX_APPLY_PATCH_ARG1} %*
 								"#,
 								                    exe.display()
 								                ),
 								            )?;
 								        }
 								    }
 								    #[cfg(unix)]
 								    const PATH_SEPARATOR: &str = ":";
 								    #[cfg(windows)]
 								    const PATH_SEPARATOR: &str = ";";
 								    let path_element = path.display();
 								    let updated_path_env_var = match std::env::var("PATH") {
 								        Ok(existing_path) => {
 								            format!("{path_element}{PATH_SEPARATOR}{existing_path}")
 								        }
 								        Err(_) => {
 								            format!("{path_element}")
 								        }
 								    };
 								    unsafe {
 								        std::env::set_var("PATH", updated_path_env_var);
 								    }
 								    Ok(temp_dir)
 								}