From 5a0ad5ab8f056e917063cab4bc5031a8b5e9f3ec Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Thu, 31 Jul 2025 13:11:47 -0700 Subject: [PATCH] chore: refactor exec.rs: create separate seatbelt.rs and spawn.rs files (#1762) At 550 lines, `exec.rs` was a bit large. In particular, I found it hard to locate the Seatbelt-related code quickly without a file with `seatbelt` in the name, so this refactors things so: - `spawn_command_under_seatbelt()` and dependent code moves to a new `seatbelt.rs` file - `spawn_child_async()` and dependent code moves to a new `spawn.rs` file --- codex-rs/cli/src/debug_sandbox.rs | 4 +- codex-rs/core/src/exec.rs | 188 +-------------------- codex-rs/core/src/lib.rs | 2 + codex-rs/core/src/seatbelt.rs | 96 +++++++++++ codex-rs/core/src/spawn.rs | 102 +++++++++++ codex-rs/core/tests/cli_stream.rs | 2 +- codex-rs/core/tests/client.rs | 2 +- codex-rs/core/tests/stream_no_completed.rs | 2 +- codex-rs/mcp-server/tests/codex_tool.rs | 2 +- codex-rs/mcp-server/tests/interrupt.rs | 2 +- 10 files changed, 210 insertions(+), 192 deletions(-) create mode 100644 codex-rs/core/src/seatbelt.rs create mode 100644 codex-rs/core/src/spawn.rs diff --git a/codex-rs/cli/src/debug_sandbox.rs b/codex-rs/cli/src/debug_sandbox.rs index 905b7461..7f0983cb 100644 --- a/codex-rs/cli/src/debug_sandbox.rs +++ b/codex-rs/cli/src/debug_sandbox.rs @@ -4,10 +4,10 @@ use codex_common::CliConfigOverrides; use codex_core::config::Config; use codex_core::config::ConfigOverrides; use codex_core::config_types::SandboxMode; -use codex_core::exec::StdioPolicy; use codex_core::exec::spawn_command_under_linux_sandbox; -use codex_core::exec::spawn_command_under_seatbelt; use codex_core::exec_env::create_env; +use codex_core::seatbelt::spawn_command_under_seatbelt; +use codex_core::spawn::StdioPolicy; use crate::LandlockCommand; use crate::SeatbeltCommand; diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 230c4ec1..06416e67 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -6,7 +6,6 @@ use std::io; use std::path::Path; use std::path::PathBuf; use std::process::ExitStatus; -use std::process::Stdio; use std::sync::Arc; use std::time::Duration; use std::time::Instant; @@ -15,14 +14,15 @@ use tokio::io::AsyncRead; use tokio::io::AsyncReadExt; use tokio::io::BufReader; use tokio::process::Child; -use tokio::process::Command; use tokio::sync::Notify; -use tracing::trace; use crate::error::CodexErr; use crate::error::Result; use crate::error::SandboxErr; use crate::protocol::SandboxPolicy; +use crate::seatbelt::spawn_command_under_seatbelt; +use crate::spawn::StdioPolicy; +use crate::spawn::spawn_child_async; // Maximum we send for each stream, which is either: // - 10KiB OR @@ -37,24 +37,6 @@ const DEFAULT_TIMEOUT_MS: u64 = 10_000; const SIGKILL_CODE: i32 = 9; const TIMEOUT_CODE: i32 = 64; -const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl"); - -/// When working with `sandbox-exec`, only consider `sandbox-exec` in `/usr/bin` -/// to defend against an attacker trying to inject a malicious version on the -/// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker -/// already has root access. -const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec"; - -/// Experimental environment variable that will be set to some non-empty value -/// if both of the following are true: -/// -/// 1. The process was spawned by Codex as part of a shell tool call. -/// 2. SandboxPolicy.has_full_network_access() was false for the tool call. -/// -/// We may try to have just one environment variable for all sandboxing -/// attributes, so this may change in the future. -pub const CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR: &str = "CODEX_SANDBOX_NETWORK_DISABLED"; - #[derive(Debug, Clone)] pub struct ExecParams { pub command: Vec, @@ -168,27 +150,6 @@ pub async fn process_exec_tool_call( } } -pub async fn spawn_command_under_seatbelt( - command: Vec, - sandbox_policy: &SandboxPolicy, - cwd: PathBuf, - stdio_policy: StdioPolicy, - env: HashMap, -) -> std::io::Result { - let args = create_seatbelt_command_args(command, sandbox_policy, &cwd); - let arg0 = None; - spawn_child_async( - PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE), - args, - arg0, - cwd, - sandbox_policy, - stdio_policy, - env, - ) - .await -} - /// Spawn a shell tool command under the Linux Landlock+seccomp sandbox helper /// (codex-linux-sandbox). /// @@ -248,65 +209,6 @@ fn create_linux_sandbox_command_args( linux_cmd } -fn create_seatbelt_command_args( - command: Vec, - sandbox_policy: &SandboxPolicy, - cwd: &Path, -) -> Vec { - let (file_write_policy, extra_cli_args) = { - if sandbox_policy.has_full_disk_write_access() { - // Allegedly, this is more permissive than `(allow file-write*)`. - ( - r#"(allow file-write* (regex #"^/"))"#.to_string(), - Vec::::new(), - ) - } else { - let writable_roots = sandbox_policy.get_writable_roots_with_cwd(cwd); - let (writable_folder_policies, cli_args): (Vec, Vec) = writable_roots - .iter() - .enumerate() - .map(|(index, root)| { - let param_name = format!("WRITABLE_ROOT_{index}"); - let policy: String = format!("(subpath (param \"{param_name}\"))"); - let cli_arg = format!("-D{param_name}={}", root.to_string_lossy()); - (policy, cli_arg) - }) - .unzip(); - if writable_folder_policies.is_empty() { - ("".to_string(), Vec::::new()) - } else { - let file_write_policy = format!( - "(allow file-write*\n{}\n)", - writable_folder_policies.join(" ") - ); - (file_write_policy, cli_args) - } - } - }; - - let file_read_policy = if sandbox_policy.has_full_disk_read_access() { - "; allow read-only file operations\n(allow file-read*)" - } else { - "" - }; - - // TODO(mbolin): apply_patch calls must also honor the SandboxPolicy. - let network_policy = if sandbox_policy.has_full_network_access() { - "(allow network-outbound)\n(allow network-inbound)\n(allow system-socket)" - } else { - "" - }; - - let full_policy = format!( - "{MACOS_SEATBELT_BASE_POLICY}\n{file_read_policy}\n{file_write_policy}\n{network_policy}" - ); - let mut seatbelt_args: Vec = vec!["-p".to_string(), full_policy]; - seatbelt_args.extend(extra_cli_args); - seatbelt_args.push("--".to_string()); - seatbelt_args.extend(command); - seatbelt_args -} - #[derive(Debug)] pub struct RawExecToolCallOutput { pub exit_status: ExitStatus, @@ -352,90 +254,6 @@ async fn exec( consume_truncated_output(child, ctrl_c, timeout_ms).await } -#[derive(Debug, Clone, Copy)] -pub enum StdioPolicy { - RedirectForShellTool, - Inherit, -} - -/// Spawns the appropriate child process for the ExecParams and SandboxPolicy, -/// ensuring the args and environment variables used to create the `Command` -/// (and `Child`) honor the configuration. -/// -/// For now, we take `SandboxPolicy` as a parameter to spawn_child() because -/// we need to determine whether to set the -/// `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` environment variable. -async fn spawn_child_async( - program: PathBuf, - args: Vec, - #[cfg_attr(not(unix), allow(unused_variables))] arg0: Option<&str>, - cwd: PathBuf, - sandbox_policy: &SandboxPolicy, - stdio_policy: StdioPolicy, - env: HashMap, -) -> std::io::Result { - trace!( - "spawn_child_async: {program:?} {args:?} {arg0:?} {cwd:?} {sandbox_policy:?} {stdio_policy:?} {env:?}" - ); - - let mut cmd = Command::new(&program); - #[cfg(unix)] - cmd.arg0(arg0.map_or_else(|| program.to_string_lossy().to_string(), String::from)); - cmd.args(args); - cmd.current_dir(cwd); - cmd.env_clear(); - cmd.envs(env); - - if !sandbox_policy.has_full_network_access() { - cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1"); - } - - // If this Codex process dies (including being killed via SIGKILL), we want - // any child processes that were spawned as part of a `"shell"` tool call - // to also be terminated. - - // This relies on prctl(2), so it only works on Linux. - #[cfg(target_os = "linux")] - unsafe { - cmd.pre_exec(|| { - // This prctl call effectively requests, "deliver SIGTERM when my - // current parent dies." - if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) == -1 { - return Err(io::Error::last_os_error()); - } - - // Though if there was a race condition and this pre_exec() block is - // run _after_ the parent (i.e., the Codex process) has already - // exited, then the parent is the _init_ process (which will never - // die), so we should just terminate the child process now. - if libc::getppid() == 1 { - libc::raise(libc::SIGTERM); - } - Ok(()) - }); - } - - match stdio_policy { - StdioPolicy::RedirectForShellTool => { - // Do not create a file descriptor for stdin because otherwise some - // commands may hang forever waiting for input. For example, ripgrep has - // a heuristic where it may try to read from stdin as explained here: - // https://github.com/BurntSushi/ripgrep/blob/e2362d4d5185d02fa857bf381e7bd52e66fafc73/crates/core/flags/hiargs.rs#L1101-L1103 - cmd.stdin(Stdio::null()); - - cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); - } - StdioPolicy::Inherit => { - // Inherit stdin, stdout, and stderr from the parent process. - cmd.stdin(Stdio::inherit()) - .stdout(Stdio::inherit()) - .stderr(Stdio::inherit()); - } - } - - cmd.kill_on_drop(true).spawn() -} - /// Consumes the output of a child process, truncating it so it is suitable for /// use as the output of a `shell` tool call. Also enforces specified timeout. pub(crate) async fn consume_truncated_output( diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 054abd74..a33e185a 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -39,7 +39,9 @@ mod project_doc; pub mod protocol; mod rollout; mod safety; +pub mod seatbelt; pub mod shell; +pub mod spawn; mod user_notification; pub mod util; diff --git a/codex-rs/core/src/seatbelt.rs b/codex-rs/core/src/seatbelt.rs new file mode 100644 index 00000000..be2acb1b --- /dev/null +++ b/codex-rs/core/src/seatbelt.rs @@ -0,0 +1,96 @@ +use std::collections::HashMap; +use std::path::Path; +use std::path::PathBuf; +use tokio::process::Child; + +use crate::protocol::SandboxPolicy; +use crate::spawn::StdioPolicy; +use crate::spawn::spawn_child_async; + +const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl"); + +/// When working with `sandbox-exec`, only consider `sandbox-exec` in `/usr/bin` +/// to defend against an attacker trying to inject a malicious version on the +/// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker +/// already has root access. +const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec"; + +pub async fn spawn_command_under_seatbelt( + command: Vec, + sandbox_policy: &SandboxPolicy, + cwd: PathBuf, + stdio_policy: StdioPolicy, + env: HashMap, +) -> std::io::Result { + let args = create_seatbelt_command_args(command, sandbox_policy, &cwd); + let arg0 = None; + spawn_child_async( + PathBuf::from(MACOS_PATH_TO_SEATBELT_EXECUTABLE), + args, + arg0, + cwd, + sandbox_policy, + stdio_policy, + env, + ) + .await +} + +fn create_seatbelt_command_args( + command: Vec, + sandbox_policy: &SandboxPolicy, + cwd: &Path, +) -> Vec { + let (file_write_policy, extra_cli_args) = { + if sandbox_policy.has_full_disk_write_access() { + // Allegedly, this is more permissive than `(allow file-write*)`. + ( + r#"(allow file-write* (regex #"^/"))"#.to_string(), + Vec::::new(), + ) + } else { + let writable_roots = sandbox_policy.get_writable_roots_with_cwd(cwd); + let (writable_folder_policies, cli_args): (Vec, Vec) = writable_roots + .iter() + .enumerate() + .map(|(index, root)| { + let param_name = format!("WRITABLE_ROOT_{index}"); + let policy: String = format!("(subpath (param \"{param_name}\"))"); + let cli_arg = format!("-D{param_name}={}", root.to_string_lossy()); + (policy, cli_arg) + }) + .unzip(); + if writable_folder_policies.is_empty() { + ("".to_string(), Vec::::new()) + } else { + let file_write_policy = format!( + "(allow file-write*\n{}\n)", + writable_folder_policies.join(" ") + ); + (file_write_policy, cli_args) + } + } + }; + + let file_read_policy = if sandbox_policy.has_full_disk_read_access() { + "; allow read-only file operations\n(allow file-read*)" + } else { + "" + }; + + // TODO(mbolin): apply_patch calls must also honor the SandboxPolicy. + let network_policy = if sandbox_policy.has_full_network_access() { + "(allow network-outbound)\n(allow network-inbound)\n(allow system-socket)" + } else { + "" + }; + + let full_policy = format!( + "{MACOS_SEATBELT_BASE_POLICY}\n{file_read_policy}\n{file_write_policy}\n{network_policy}" + ); + let mut seatbelt_args: Vec = vec!["-p".to_string(), full_policy]; + seatbelt_args.extend(extra_cli_args); + seatbelt_args.push("--".to_string()); + seatbelt_args.extend(command); + seatbelt_args +} diff --git a/codex-rs/core/src/spawn.rs b/codex-rs/core/src/spawn.rs new file mode 100644 index 00000000..9fde2653 --- /dev/null +++ b/codex-rs/core/src/spawn.rs @@ -0,0 +1,102 @@ +use std::collections::HashMap; +use std::path::PathBuf; +use std::process::Stdio; +use tokio::process::Child; +use tokio::process::Command; +use tracing::trace; + +use crate::protocol::SandboxPolicy; + +/// Experimental environment variable that will be set to some non-empty value +/// if both of the following are true: +/// +/// 1. The process was spawned by Codex as part of a shell tool call. +/// 2. SandboxPolicy.has_full_network_access() was false for the tool call. +/// +/// We may try to have just one environment variable for all sandboxing +/// attributes, so this may change in the future. +pub const CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR: &str = "CODEX_SANDBOX_NETWORK_DISABLED"; + +#[derive(Debug, Clone, Copy)] +pub enum StdioPolicy { + RedirectForShellTool, + Inherit, +} + +/// Spawns the appropriate child process for the ExecParams and SandboxPolicy, +/// ensuring the args and environment variables used to create the `Command` +/// (and `Child`) honor the configuration. +/// +/// For now, we take `SandboxPolicy` as a parameter to spawn_child() because +/// we need to determine whether to set the +/// `CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR` environment variable. +pub(crate) async fn spawn_child_async( + program: PathBuf, + args: Vec, + #[cfg_attr(not(unix), allow(unused_variables))] arg0: Option<&str>, + cwd: PathBuf, + sandbox_policy: &SandboxPolicy, + stdio_policy: StdioPolicy, + env: HashMap, +) -> std::io::Result { + trace!( + "spawn_child_async: {program:?} {args:?} {arg0:?} {cwd:?} {sandbox_policy:?} {stdio_policy:?} {env:?}" + ); + + let mut cmd = Command::new(&program); + #[cfg(unix)] + cmd.arg0(arg0.map_or_else(|| program.to_string_lossy().to_string(), String::from)); + cmd.args(args); + cmd.current_dir(cwd); + cmd.env_clear(); + cmd.envs(env); + + if !sandbox_policy.has_full_network_access() { + cmd.env(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR, "1"); + } + + // If this Codex process dies (including being killed via SIGKILL), we want + // any child processes that were spawned as part of a `"shell"` tool call + // to also be terminated. + + // This relies on prctl(2), so it only works on Linux. + #[cfg(target_os = "linux")] + unsafe { + cmd.pre_exec(|| { + // This prctl call effectively requests, "deliver SIGTERM when my + // current parent dies." + if libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM) == -1 { + return Err(std::io::Error::last_os_error()); + } + + // Though if there was a race condition and this pre_exec() block is + // run _after_ the parent (i.e., the Codex process) has already + // exited, then the parent is the _init_ process (which will never + // die), so we should just terminate the child process now. + if libc::getppid() == 1 { + libc::raise(libc::SIGTERM); + } + Ok(()) + }); + } + + match stdio_policy { + StdioPolicy::RedirectForShellTool => { + // Do not create a file descriptor for stdin because otherwise some + // commands may hang forever waiting for input. For example, ripgrep has + // a heuristic where it may try to read from stdin as explained here: + // https://github.com/BurntSushi/ripgrep/blob/e2362d4d5185d02fa857bf381e7bd52e66fafc73/crates/core/flags/hiargs.rs#L1101-L1103 + cmd.stdin(Stdio::null()); + + cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); + } + StdioPolicy::Inherit => { + // Inherit stdin, stdout, and stderr from the parent process. + cmd.stdin(Stdio::inherit()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()); + } + } + + cmd.kill_on_drop(true).spawn() +} diff --git a/codex-rs/core/tests/cli_stream.rs b/codex-rs/core/tests/cli_stream.rs index ee0377fc..c8570577 100644 --- a/codex-rs/core/tests/cli_stream.rs +++ b/codex-rs/core/tests/cli_stream.rs @@ -1,7 +1,7 @@ #![expect(clippy::unwrap_used)] use assert_cmd::Command as AssertCommand; -use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use std::time::Duration; use std::time::Instant; use tempfile::TempDir; diff --git a/codex-rs/core/tests/client.rs b/codex-rs/core/tests/client.rs index 1286928e..ab8ad683 100644 --- a/codex-rs/core/tests/client.rs +++ b/codex-rs/core/tests/client.rs @@ -5,11 +5,11 @@ use codex_core::Codex; use codex_core::CodexSpawnOk; use codex_core::ModelProviderInfo; use codex_core::built_in_model_providers; -use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_core::protocol::EventMsg; use codex_core::protocol::InputItem; use codex_core::protocol::Op; use codex_core::protocol::SessionConfiguredEvent; +use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_login::AuthDotJson; use codex_login::AuthMode; use codex_login::CodexAuth; diff --git a/codex-rs/core/tests/stream_no_completed.rs b/codex-rs/core/tests/stream_no_completed.rs index d2fc0355..3e30d937 100644 --- a/codex-rs/core/tests/stream_no_completed.rs +++ b/codex-rs/core/tests/stream_no_completed.rs @@ -6,10 +6,10 @@ use std::time::Duration; use codex_core::Codex; use codex_core::CodexSpawnOk; use codex_core::ModelProviderInfo; -use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_core::protocol::EventMsg; use codex_core::protocol::InputItem; use codex_core::protocol::Op; +use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_login::CodexAuth; use core_test_support::load_default_config_for_test; use core_test_support::load_sse_fixture; diff --git a/codex-rs/mcp-server/tests/codex_tool.rs b/codex-rs/mcp-server/tests/codex_tool.rs index 0f06483f..7d59ff20 100644 --- a/codex-rs/mcp-server/tests/codex_tool.rs +++ b/codex-rs/mcp-server/tests/codex_tool.rs @@ -3,9 +3,9 @@ use std::env; use std::path::Path; use std::path::PathBuf; -use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_core::protocol::FileChange; use codex_core::protocol::ReviewDecision; +use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_mcp_server::CodexToolCallParam; use codex_mcp_server::ExecApprovalElicitRequestParams; use codex_mcp_server::ExecApprovalResponse; diff --git a/codex-rs/mcp-server/tests/interrupt.rs b/codex-rs/mcp-server/tests/interrupt.rs index 313bc7af..b2767f5e 100644 --- a/codex-rs/mcp-server/tests/interrupt.rs +++ b/codex-rs/mcp-server/tests/interrupt.rs @@ -3,7 +3,7 @@ use std::path::Path; -use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use codex_core::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_mcp_server::CodexToolCallParam; use mcp_types::JSONRPCResponse; use mcp_types::RequestId;