fix: run python_multiprocessing_lock_works integration test on Mac and Linux (#2318)

The high-order bit on this PR is that it makes it so `sandbox.rs` tests both Mac and Linux, as we introduce a general `spawn_command_under_sandbox()` function with platform-specific implementations for testing. An important, and interesting, discovery in porting the test to Linux is that (for reasons cited in the code comments), `/dev/shm` has to be added to `writable_roots` on Linux in order for `multiprocessing.Lock` to work there. Granting write access to `/dev/shm` comes with some degree of risk, so we do not make this the default for Codex CLI. Piggybacking on top of #2317, this moves the `python_multiprocessing_lock_works` test yet again, moving `codex-rs/core/tests/sandbox.rs` to `codex-rs/exec/tests/sandbox.rs` because in `codex-rs/exec/tests` we can use `cargo_bin()` like so: ``` let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec"); ``` which is necessary so we can use `codex_linux_sandbox_exe` and therefore `spawn_command_under_linux_sandbox` in an integration test. This also moves `spawn_command_under_linux_sandbox()` out of `exec.rs` and into `landlock.rs`, which makes things more consistent with `seatbelt.rs` in `codex-core`. For reference, https://github.com/openai/codex/pull/1808 is the PR that made the change to Seatbelt to get this test to pass on Mac.
2025-08-14 15:47:48 -07:00
parent a8c7f5391c
commit 2ecca79663
6 changed files with 161 additions and 110 deletions
--- a/codex-rs/cli/src/debug_sandbox.rs
+++ b/codex-rs/cli/src/debug_sandbox.rs
@@ -4,8 +4,8 @@ use codex_common::CliConfigOverrides;
 use codex_core::config::Config;
 use codex_core::config::ConfigOverrides;
 use codex_core::config_types::SandboxMode;
-use codex_core::exec::spawn_command_under_linux_sandbox;
 use codex_core::exec_env::create_env;
+use codex_core::landlock::spawn_command_under_linux_sandbox;
 use codex_core::seatbelt::spawn_command_under_seatbelt;
 use codex_core::spawn::StdioPolicy;

--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -3,7 +3,6 @@ use std::os::unix::process::ExitStatusExt;

 use std::collections::HashMap;
 use std::io;
-use std::path::Path;
 use std::path::PathBuf;
 use std::process::ExitStatus;
 use std::time::Duration;
@@ -18,6 +17,7 @@ use tokio::process::Child;
 use crate::error::CodexErr;
 use crate::error::Result;
 use crate::error::SandboxErr;
+use crate::landlock::spawn_command_under_linux_sandbox;
 use crate::protocol::Event;
 use crate::protocol::EventMsg;
 use crate::protocol::ExecCommandOutputDeltaEvent;
@@ -163,65 +163,6 @@ pub async fn process_exec_tool_call(
    }
 }

-/// Spawn a shell tool command under the Linux Landlock+seccomp sandbox helper
-/// (codex-linux-sandbox).
-///
-/// Unlike macOS Seatbelt where we directly embed the policy text, the Linux
-/// helper accepts a list of `--sandbox-permission`/`-s` flags mirroring the
-/// public CLI. We convert the internal [`SandboxPolicy`] representation into
-/// the equivalent CLI options.
-pub async fn spawn_command_under_linux_sandbox<P>(
-    codex_linux_sandbox_exe: P,
-    command: Vec<String>,
-    sandbox_policy: &SandboxPolicy,
-    cwd: PathBuf,
-    stdio_policy: StdioPolicy,
-    env: HashMap<String, String>,
-) -> std::io::Result<Child>
-where
-    P: AsRef<Path>,
-{
-    let args = create_linux_sandbox_command_args(command, sandbox_policy, &cwd);
-    let arg0 = Some("codex-linux-sandbox");
-    spawn_child_async(
-        codex_linux_sandbox_exe.as_ref().to_path_buf(),
-        args,
-        arg0,
-        cwd,
-        sandbox_policy,
-        stdio_policy,
-        env,
-    )
-    .await
-}
-
-/// Converts the sandbox policy into the CLI invocation for `codex-linux-sandbox`.
-fn create_linux_sandbox_command_args(
-    command: Vec<String>,
-    sandbox_policy: &SandboxPolicy,
-    cwd: &Path,
-) -> Vec<String> {
-    #[expect(clippy::expect_used)]
-    let sandbox_policy_cwd = cwd.to_str().expect("cwd must be valid UTF-8").to_string();
-
-    #[expect(clippy::expect_used)]
-    let sandbox_policy_json =
-        serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON");
-
-    let mut linux_cmd: Vec<String> = vec![
-        sandbox_policy_cwd,
-        sandbox_policy_json,
-        // Separator so that command arguments starting with `-` are not parsed as
-        // options of the helper itself.
-        "--".to_string(),
-    ];
-
-    // Append the original tool command.
-    linux_cmd.extend(command);
-
-    linux_cmd
-}
-
 /// We don't have a fully deterministic way to tell if our command failed
 /// because of the sandbox - a command in the user's zshrc file might hit an
 /// error, but the command itself might fail or succeed for other reasons.
--- a/codex-rs/core/src/landlock.rs
+++ b/codex-rs/core/src/landlock.rs
@@ -0,0 +1,66 @@
+use crate::protocol::SandboxPolicy;
+use crate::spawn::StdioPolicy;
+use crate::spawn::spawn_child_async;
+use std::collections::HashMap;
+use std::path::Path;
+use std::path::PathBuf;
+use tokio::process::Child;
+
+/// Spawn a shell tool command under the Linux Landlock+seccomp sandbox helper
+/// (codex-linux-sandbox).
+///
+/// Unlike macOS Seatbelt where we directly embed the policy text, the Linux
+/// helper accepts a list of `--sandbox-permission`/`-s` flags mirroring the
+/// public CLI. We convert the internal [`SandboxPolicy`] representation into
+/// the equivalent CLI options.
+pub async fn spawn_command_under_linux_sandbox<P>(
+    codex_linux_sandbox_exe: P,
+    command: Vec<String>,
+    sandbox_policy: &SandboxPolicy,
+    cwd: PathBuf,
+    stdio_policy: StdioPolicy,
+    env: HashMap<String, String>,
+) -> std::io::Result<Child>
+where
+    P: AsRef<Path>,
+{
+    let args = create_linux_sandbox_command_args(command, sandbox_policy, &cwd);
+    let arg0 = Some("codex-linux-sandbox");
+    spawn_child_async(
+        codex_linux_sandbox_exe.as_ref().to_path_buf(),
+        args,
+        arg0,
+        cwd,
+        sandbox_policy,
+        stdio_policy,
+        env,
+    )
+    .await
+}
+
+/// Converts the sandbox policy into the CLI invocation for `codex-linux-sandbox`.
+fn create_linux_sandbox_command_args(
+    command: Vec<String>,
+    sandbox_policy: &SandboxPolicy,
+    cwd: &Path,
+) -> Vec<String> {
+    #[expect(clippy::expect_used)]
+    let sandbox_policy_cwd = cwd.to_str().expect("cwd must be valid UTF-8").to_string();
+
+    #[expect(clippy::expect_used)]
+    let sandbox_policy_json =
+        serde_json::to_string(sandbox_policy).expect("Failed to serialize SandboxPolicy to JSON");
+
+    let mut linux_cmd: Vec<String> = vec![
+        sandbox_policy_cwd,
+        sandbox_policy_json,
+        // Separator so that command arguments starting with `-` are not parsed as
+        // options of the helper itself.
+        "--".to_string(),
+    ];
+
+    // Append the original tool command.
+    linux_cmd.extend(command);
+
+    linux_cmd
+}
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -24,6 +24,7 @@ pub mod exec_env;
 mod flags;
 pub mod git_info;
 mod is_safe_command;
+pub mod landlock;
 mod mcp_connection_manager;
 mod mcp_tool_call;
 mod message_history;
--- a/codex-rs/core/tests/sandbox.rs
+++ b/codex-rs/core/tests/sandbox.rs
@@ -1,49 +0,0 @@
-// TODO(mbolin): Update this test to run on Linux, as well.
-// (Should rename the test as part of that work.)
-#[cfg(target_os = "macos")]
-#[tokio::test]
-async fn python_multiprocessing_lock_works_under_seatbelt() {
-    #![expect(clippy::expect_used)]
-    use codex_core::protocol::SandboxPolicy;
-    use codex_core::seatbelt::spawn_command_under_seatbelt;
-    use codex_core::spawn::StdioPolicy;
-    use std::collections::HashMap;
-
-    let policy = SandboxPolicy::WorkspaceWrite {
-        writable_roots: vec![],
-        network_access: false,
-        exclude_tmpdir_env_var: false,
-        exclude_slash_tmp: false,
-    };
-
-    let python_code = r#"import multiprocessing
-from multiprocessing import Lock, Process
-
-def f(lock):
-    with lock:
-        print("Lock acquired in child process")
-
-if __name__ == '__main__':
-    lock = Lock()
-    p = Process(target=f, args=(lock,))
-    p.start()
-    p.join()
-"#;
-
-    let mut child = spawn_command_under_seatbelt(
-        vec![
-            "python3".to_string(),
-            "-c".to_string(),
-            python_code.to_string(),
-        ],
-        &policy,
-        std::env::current_dir().expect("should be able to get current dir"),
-        StdioPolicy::RedirectForShellTool,
-        HashMap::new(),
-    )
-    .await
-    .expect("should be able to spawn python under seatbelt");
-
-    let status = child.wait().await.expect("should wait for child process");
-    assert!(status.success(), "python exited with {status:?}");
-}
--- a/codex-rs/exec/tests/sandbox.rs
+++ b/codex-rs/exec/tests/sandbox.rs
@@ -0,0 +1,92 @@
+#![cfg(unix)]
+#![expect(clippy::expect_used)]
+
+use codex_core::protocol::SandboxPolicy;
+use codex_core::spawn::StdioPolicy;
+use std::collections::HashMap;
+use std::path::PathBuf;
+use tokio::process::Child;
+
+#[cfg(target_os = "macos")]
+async fn spawn_command_under_sandbox(
+    command: Vec<String>,
+    sandbox_policy: &SandboxPolicy,
+    cwd: PathBuf,
+    stdio_policy: StdioPolicy,
+    env: HashMap<String, String>,
+) -> std::io::Result<Child> {
+    use codex_core::seatbelt::spawn_command_under_seatbelt;
+    spawn_command_under_seatbelt(command, sandbox_policy, cwd, stdio_policy, env).await
+}
+
+#[cfg(target_os = "linux")]
+async fn spawn_command_under_sandbox(
+    command: Vec<String>,
+    sandbox_policy: &SandboxPolicy,
+    cwd: PathBuf,
+    stdio_policy: StdioPolicy,
+    env: HashMap<String, String>,
+) -> std::io::Result<Child> {
+    use codex_core::landlock::spawn_command_under_linux_sandbox;
+    let codex_linux_sandbox_exe = assert_cmd::cargo::cargo_bin("codex-exec");
+    spawn_command_under_linux_sandbox(
+        codex_linux_sandbox_exe,
+        command,
+        sandbox_policy,
+        cwd,
+        stdio_policy,
+        env,
+    )
+    .await
+}
+
+#[tokio::test]
+async fn python_multiprocessing_lock_works_under_sandbox() {
+    #[cfg(target_os = "macos")]
+    let writable_roots = Vec::<PathBuf>::new();
+
+    // From https://man7.org/linux/man-pages/man7/sem_overview.7.html
+    //
+    // > On Linux, named semaphores are created in a virtual filesystem,
+    // > normally mounted under /dev/shm.
+    #[cfg(target_os = "linux")]
+    let writable_roots = vec![PathBuf::from("/dev/shm")];
+
+    let policy = SandboxPolicy::WorkspaceWrite {
+        writable_roots,
+        network_access: false,
+        exclude_tmpdir_env_var: false,
+        exclude_slash_tmp: false,
+    };
+
+    let python_code = r#"import multiprocessing
+from multiprocessing import Lock, Process
+
+def f(lock):
+    with lock:
+        print("Lock acquired in child process")
+
+if __name__ == '__main__':
+    lock = Lock()
+    p = Process(target=f, args=(lock,))
+    p.start()
+    p.join()
+"#;
+
+    let mut child = spawn_command_under_sandbox(
+        vec![
+            "python3".to_string(),
+            "-c".to_string(),
+            python_code.to_string(),
+        ],
+        &policy,
+        std::env::current_dir().expect("should be able to get current dir"),
+        StdioPolicy::Inherit,
+        HashMap::new(),
+    )
+    .await
+    .expect("should be able to spawn python under sandbox");
+
+    let status = child.wait().await.expect("should wait for child process");
+    assert!(status.success(), "python exited with {status:?}");
+}