codex-rs/core/src/exec_linux.rs

use std::io;
use std::path::Path;
use std::sync::Arc;

use crate::error::CodexErr;
use crate::error::Result;
use crate::exec::ExecParams;
use crate::exec::RawExecToolCallOutput;
use crate::exec::StdioPolicy;
use crate::exec::consume_truncated_output;
use crate::exec::spawn_child_async;
use crate::protocol::SandboxPolicy;

use tokio::sync::Notify;

pub fn exec_linux(
    params: ExecParams,
    ctrl_c: Arc<Notify>,
    sandbox_policy: &SandboxPolicy,
) -> Result<RawExecToolCallOutput> {
    // Allow READ on /
    // Allow WRITE on /dev/null
    let ctrl_c_copy = ctrl_c.clone();
    let sandbox_policy = sandbox_policy.clone();

    // Isolate thread to run the sandbox from
    let tool_call_output = std::thread::spawn(move || {
        let rt = tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()?;

        rt.block_on(async {
            let ExecParams {
                command,
                cwd,
                timeout_ms,
            } = params;
            apply_sandbox_policy_to_current_thread(&sandbox_policy, &cwd)?;
            let child = spawn_child_async(
                command,
                cwd,
                &sandbox_policy,
                StdioPolicy::RedirectForShellTool,
            )
            .await?;
            consume_truncated_output(child, ctrl_c_copy, timeout_ms).await
        })
    })
    .join();

    match tool_call_output {
        Ok(Ok(output)) => Ok(output),
        Ok(Err(e)) => Err(e),
        Err(e) => Err(CodexErr::Io(io::Error::new(
            io::ErrorKind::Other,
            format!("thread join failed: {e:?}"),
        ))),
    }
}

#[cfg(target_os = "linux")]
pub fn apply_sandbox_policy_to_current_thread(
    sandbox_policy: &SandboxPolicy,
    cwd: &Path,
) -> Result<()> {
    crate::landlock::apply_sandbox_policy_to_current_thread(sandbox_policy, cwd)
}

#[cfg(not(target_os = "linux"))]
pub fn apply_sandbox_policy_to_current_thread(
    _sandbox_policy: &SandboxPolicy,
    _cwd: &Path,
) -> Result<()> {
    Err(CodexErr::Io(io::Error::new(
        io::ErrorKind::InvalidInput,
        "linux sandbox is not supported on this platform",
    )))
}
feat: experimental env var: CODEX_SANDBOX_NETWORK_DISABLED (#879) When using Codex to develop Codex itself, I noticed that sometimes it would try to add `#[ignore]` to the following tests: ``` keeps_previous_response_id_between_tasks() retries_on_early_close() ``` Both of these tests start a `MockServer` that launches an HTTP server on an ephemeral port and requires network access to hit it, which the Seatbelt policy associated with `--full-auto` correctly denies. If I wasn't paying attention to the code that Codex was generating, one of these `#[ignore]` annotations could have slipped into the codebase, effectively disabling the test for everyone. To that end, this PR enables an experimental environment variable named `CODEX_SANDBOX_NETWORK_DISABLED` that is set to `1` if the `SandboxPolicy` used to spawn the process does not have full network access. I say it is "experimental" because I'm not convinced this API is quite right, but we need to start somewhere. (It might be more appropriate to have an env var like `CODEX_SANDBOX=full-auto`, but the challenge is that our newer `SandboxPolicy` abstraction does not map to a simple set of enums like in the TypeScript CLI.) We leverage this new functionality by adding the following code to the aforementioned tests as a way to "dynamically disable" them: ```rust if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { println!( "Skipping test because it cannot execute when network is disabled in a Codex sandbox." ); return; } ``` We can use the `debug seatbelt --full-auto` command to verify that `cargo test` fails when run under Seatbelt prior to this change: ``` $ cargo run --bin codex -- debug seatbelt --full-auto -- cargo test ---- keeps_previous_response_id_between_tasks stdout ---- thread 'keeps_previous_response_id_between_tasks' panicked at /Users/mbolin/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/wiremock-0.6.3/src/mock_server/builder.rs:107:46: Failed to bind an OS port for a mock server.: Os { code: 1, kind: PermissionDenied, message: "Operation not permitted" } note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace failures: keeps_previous_response_id_between_tasks test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s error: test failed, to rerun pass `-p codex-core --test previous_response_id` ``` Though after this change, the above command succeeds! This means that, going forward, when Codex operates on Codex itself, when it runs `cargo test`, only "real failures" should cause the command to fail. As part of this change, I decided to tighten up the codepaths for running `exec()` for shell tool calls. In particular, we do it in `core` for the main Codex business logic itself, but we also expose this logic via `debug` subcommands in the CLI in the `cli` crate. The logic for the `debug` subcommands was not quite as faithful to the true business logic as I liked, so I: * refactored a bit of the Linux code, splitting `linux.rs` into `linux_exec.rs` and `landlock.rs` in the `core` crate. * gating less code behind `#[cfg(target_os = "linux")]` because such code does not get built by default when I develop on Mac, which means I either have to build the code in Docker or wait for CI signal * introduced `macro_rules! configure_command` in `exec.rs` so we can have both sync and async versions of this code. The synchronous version seems more appropriate for straight threads or potentially fork/exec. 2025-05-09 18:29:34 -07:00			`use std::io;`
			`use std::path::Path;`
			`use std::sync::Arc;`

			`use crate::error::CodexErr;`
			`use crate::error::Result;`
			`use crate::exec::ExecParams;`
			`use crate::exec::RawExecToolCallOutput;`
			`use crate::exec::StdioPolicy;`
			`use crate::exec::consume_truncated_output;`
			`use crate::exec::spawn_child_async;`
			`use crate::protocol::SandboxPolicy;`

			`use tokio::sync::Notify;`

			`pub fn exec_linux(`
			`params: ExecParams,`
			`ctrl_c: Arc<Notify>,`
			`sandbox_policy: &SandboxPolicy,`
			`) -> Result<RawExecToolCallOutput> {`
			`// Allow READ on /`
			`// Allow WRITE on /dev/null`
			`let ctrl_c_copy = ctrl_c.clone();`
			`let sandbox_policy = sandbox_policy.clone();`

			`// Isolate thread to run the sandbox from`
			`let tool_call_output = std::thread::spawn(move \|\| {`
			`let rt = tokio::runtime::Builder::new_current_thread()`
			`.enable_all()`
Disallow expect via lints (#865) Adds `expect()` as a denied lint. Same deal applies with `unwrap()` where we now need to put `#[expect(...` on ones that we legit want. Took care to enable `expect()` in test contexts. # Tests ``` cargo fmt cargo clippy --all-features --all-targets --no-deps -- -D warnings cargo test ``` 2025-05-12 08:45:46 -07:00			`.build()?;`
feat: experimental env var: CODEX_SANDBOX_NETWORK_DISABLED (#879) When using Codex to develop Codex itself, I noticed that sometimes it would try to add `#[ignore]` to the following tests: ``` keeps_previous_response_id_between_tasks() retries_on_early_close() ``` Both of these tests start a `MockServer` that launches an HTTP server on an ephemeral port and requires network access to hit it, which the Seatbelt policy associated with `--full-auto` correctly denies. If I wasn't paying attention to the code that Codex was generating, one of these `#[ignore]` annotations could have slipped into the codebase, effectively disabling the test for everyone. To that end, this PR enables an experimental environment variable named `CODEX_SANDBOX_NETWORK_DISABLED` that is set to `1` if the `SandboxPolicy` used to spawn the process does not have full network access. I say it is "experimental" because I'm not convinced this API is quite right, but we need to start somewhere. (It might be more appropriate to have an env var like `CODEX_SANDBOX=full-auto`, but the challenge is that our newer `SandboxPolicy` abstraction does not map to a simple set of enums like in the TypeScript CLI.) We leverage this new functionality by adding the following code to the aforementioned tests as a way to "dynamically disable" them: ```rust if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { println!( "Skipping test because it cannot execute when network is disabled in a Codex sandbox." ); return; } ``` We can use the `debug seatbelt --full-auto` command to verify that `cargo test` fails when run under Seatbelt prior to this change: ``` $ cargo run --bin codex -- debug seatbelt --full-auto -- cargo test ---- keeps_previous_response_id_between_tasks stdout ---- thread 'keeps_previous_response_id_between_tasks' panicked at /Users/mbolin/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/wiremock-0.6.3/src/mock_server/builder.rs:107:46: Failed to bind an OS port for a mock server.: Os { code: 1, kind: PermissionDenied, message: "Operation not permitted" } note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace failures: keeps_previous_response_id_between_tasks test result: FAILED. 0 passed; 1 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s error: test failed, to rerun pass `-p codex-core --test previous_response_id` ``` Though after this change, the above command succeeds! This means that, going forward, when Codex operates on Codex itself, when it runs `cargo test`, only "real failures" should cause the command to fail. As part of this change, I decided to tighten up the codepaths for running `exec()` for shell tool calls. In particular, we do it in `core` for the main Codex business logic itself, but we also expose this logic via `debug` subcommands in the CLI in the `cli` crate. The logic for the `debug` subcommands was not quite as faithful to the true business logic as I liked, so I: * refactored a bit of the Linux code, splitting `linux.rs` into `linux_exec.rs` and `landlock.rs` in the `core` crate. * gating less code behind `#[cfg(target_os = "linux")]` because such code does not get built by default when I develop on Mac, which means I either have to build the code in Docker or wait for CI signal * introduced `macro_rules! configure_command` in `exec.rs` so we can have both sync and async versions of this code. The synchronous version seems more appropriate for straight threads or potentially fork/exec. 2025-05-09 18:29:34 -07:00
			`rt.block_on(async {`
			`let ExecParams {`
			`command,`
			`cwd,`
			`timeout_ms,`
			`} = params;`
			`apply_sandbox_policy_to_current_thread(&sandbox_policy, &cwd)?;`
			`let child = spawn_child_async(`
			`command,`
			`cwd,`
			`&sandbox_policy,`
			`StdioPolicy::RedirectForShellTool,`
			`)`
			`.await?;`
			`consume_truncated_output(child, ctrl_c_copy, timeout_ms).await`
			`})`
			`})`
			`.join();`

			`match tool_call_output {`
			`Ok(Ok(output)) => Ok(output),`
			`Ok(Err(e)) => Err(e),`
			`Err(e) => Err(CodexErr::Io(io::Error::new(`
			`io::ErrorKind::Other,`
			`format!("thread join failed: {e:?}"),`
			`))),`
			`}`
			`}`

			`#[cfg(target_os = "linux")]`
			`pub fn apply_sandbox_policy_to_current_thread(`
			`sandbox_policy: &SandboxPolicy,`
			`cwd: &Path,`
			`) -> Result<()> {`
			`crate::landlock::apply_sandbox_policy_to_current_thread(sandbox_policy, cwd)`
			`}`

			`#[cfg(not(target_os = "linux"))]`
			`pub fn apply_sandbox_policy_to_current_thread(`
			`_sandbox_policy: &SandboxPolicy,`
			`_cwd: &Path,`
			`) -> Result<()> {`
			`Err(CodexErr::Io(io::Error::new(`
			`io::ErrorKind::InvalidInput,`
			`"linux sandbox is not supported on this platform",`
			`)))`
			`}`