diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 2ed86153..cf5f961a 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1067,6 +1067,7 @@ dependencies = [ "chrono", "codex-app-server-protocol", "codex-apply-patch", + "codex-arg0", "codex-async-utils", "codex-file-search", "codex-git", @@ -1081,6 +1082,7 @@ dependencies = [ "codex-windows-sandbox", "core-foundation 0.9.4", "core_test_support", + "ctor 0.5.0", "dirs", "dunce", "env-flags", diff --git a/codex-rs/arg0/src/lib.rs b/codex-rs/arg0/src/lib.rs index e70ff2df..6b605364 100644 --- a/codex-rs/arg0/src/lib.rs +++ b/codex-rs/arg0/src/lib.rs @@ -11,32 +11,7 @@ const LINUX_SANDBOX_ARG0: &str = "codex-linux-sandbox"; const APPLY_PATCH_ARG0: &str = "apply_patch"; const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch"; -/// While we want to deploy the Codex CLI as a single executable for simplicity, -/// we also want to expose some of its functionality as distinct CLIs, so we use -/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows -/// us to simulate deploying multiple executables as a single binary on Mac and -/// Linux (but not Windows). -/// -/// When the current executable is invoked through the hard-link or alias named -/// `codex-linux-sandbox` we *directly* execute -/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we: -/// -/// 1. Load `.env` values from `~/.codex/.env` before creating any threads. -/// 2. Construct a Tokio multi-thread runtime. -/// 3. Derive the path to the current executable (so children can re-invoke the -/// sandbox) when running on Linux. -/// 4. Execute the provided async `main_fn` inside that runtime, forwarding any -/// error. Note that `main_fn` receives `codex_linux_sandbox_exe: -/// Option`, as an argument, which is generally needed as part of -/// constructing [`codex_core::config::Config`]. -/// -/// This function should be used to wrap any `main()` function in binary crates -/// in this workspace that depends on these helper CLIs. -pub fn arg0_dispatch_or_else(main_fn: F) -> anyhow::Result<()> -where - F: FnOnce(Option) -> Fut, - Fut: Future>, -{ +pub fn arg0_dispatch() -> Option { // Determine if we were invoked via the special alias. let mut args = std::env::args_os(); let argv0 = args.next().unwrap_or_default(); @@ -76,10 +51,7 @@ where // before creating any threads/the Tokio runtime. load_dotenv(); - // Retain the TempDir so it exists for the lifetime of the invocation of - // this executable. Admittedly, we could invoke `keep()` on it, but it - // would be nice to avoid leaving temporary directories behind, if possible. - let _path_entry = match prepend_path_entry_for_apply_patch() { + match prepend_path_entry_for_codex_aliases() { Ok(path_entry) => Some(path_entry), Err(err) => { // It is possible that Codex will proceed successfully even if @@ -87,7 +59,39 @@ where eprintln!("WARNING: proceeding, even though we could not update PATH: {err}"); None } - }; + } +} + +/// While we want to deploy the Codex CLI as a single executable for simplicity, +/// we also want to expose some of its functionality as distinct CLIs, so we use +/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows +/// us to simulate deploying multiple executables as a single binary on Mac and +/// Linux (but not Windows). +/// +/// When the current executable is invoked through the hard-link or alias named +/// `codex-linux-sandbox` we *directly* execute +/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we: +/// +/// 1. Load `.env` values from `~/.codex/.env` before creating any threads. +/// 2. Construct a Tokio multi-thread runtime. +/// 3. Derive the path to the current executable (so children can re-invoke the +/// sandbox) when running on Linux. +/// 4. Execute the provided async `main_fn` inside that runtime, forwarding any +/// error. Note that `main_fn` receives `codex_linux_sandbox_exe: +/// Option`, as an argument, which is generally needed as part of +/// constructing [`codex_core::config::Config`]. +/// +/// This function should be used to wrap any `main()` function in binary crates +/// in this workspace that depends on these helper CLIs. +pub fn arg0_dispatch_or_else(main_fn: F) -> anyhow::Result<()> +where + F: FnOnce(Option) -> Fut, + Fut: Future>, +{ + // Retain the TempDir so it exists for the lifetime of the invocation of + // this executable. Admittedly, we could invoke `keep()` on it, but it + // would be nice to avoid leaving temporary directories behind, if possible. + let _path_entry = arg0_dispatch(); // Regular invocation – create a Tokio runtime and execute the provided // async entry-point. @@ -144,11 +148,16 @@ where /// /// IMPORTANT: This function modifies the PATH environment variable, so it MUST /// be called before multiple threads are spawned. -fn prepend_path_entry_for_apply_patch() -> std::io::Result { +pub fn prepend_path_entry_for_codex_aliases() -> std::io::Result { let temp_dir = TempDir::new()?; let path = temp_dir.path(); - for filename in &[APPLY_PATCH_ARG0, MISSPELLED_APPLY_PATCH_ARG0] { + for filename in &[ + APPLY_PATCH_ARG0, + MISSPELLED_APPLY_PATCH_ARG0, + #[cfg(target_os = "linux")] + LINUX_SANDBOX_ARG0, + ] { let exe = std::env::current_exe()?; #[cfg(unix)] diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index 17a7a166..6839504b 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -32,6 +32,7 @@ codex-utils-pty = { workspace = true } codex-utils-readiness = { workspace = true } codex-utils-string = { workspace = true } codex-utils-tokenizer = { workspace = true } +codex-windows-sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" } dirs = { workspace = true } dunce = { workspace = true } env-flags = { workspace = true } @@ -83,7 +84,6 @@ tree-sitter-bash = { workspace = true } uuid = { workspace = true, features = ["serde", "v4", "v5"] } which = { workspace = true } wildmatch = { workspace = true } -codex_windows_sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" } [target.'cfg(target_os = "linux")'.dependencies] @@ -104,7 +104,9 @@ openssl-sys = { workspace = true, features = ["vendored"] } [dev-dependencies] assert_cmd = { workspace = true } assert_matches = { workspace = true } +codex-arg0 = { workspace = true } core_test_support = { workspace = true } +ctor = { workspace = true } escargot = { workspace = true } image = { workspace = true, features = ["jpeg", "png"] } maplit = { workspace = true } diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index bc10fc2b..55a5f589 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -300,10 +300,16 @@ impl UnifiedExecSessionManager { .command .split_first() .ok_or(UnifiedExecError::MissingCommandLine)?; - let spawned = - codex_utils_pty::spawn_pty_process(program, args, env.cwd.as_path(), &env.env) - .await - .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; + + let spawned = codex_utils_pty::spawn_pty_process( + program, + args, + env.cwd.as_path(), + &env.env, + &env.arg0, + ) + .await + .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; UnifiedExecSession::from_spawned(spawned, env.sandbox).await } diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index a8841674..a19bfa25 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -1,4 +1,17 @@ // Aggregates all former standalone integration tests as modules. +use codex_arg0::arg0_dispatch; +use ctor::ctor; +use tempfile::TempDir; + +// This code runs before any other tests are run. +// It allows the test binary to behave like codex and dispatch to apply_patch and codex-linux-sandbox +// based on the arg0. +// NOTE: this doesn't work on ARM +#[ctor] +pub static CODEX_ALIASES_TEMP_DIR: TempDir = unsafe { + #[allow(clippy::unwrap_used)] + arg0_dispatch().unwrap() +}; #[cfg(not(target_os = "windows"))] mod abort_tasks; diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index cd8b3382..1225b51c 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -66,7 +66,7 @@ fn parse_unified_exec_output(raw: &str) -> Result { let cleaned = raw.trim_matches('\r'); let captures = regex .captures(cleaned) - .ok_or_else(|| anyhow::anyhow!("missing Output section in unified exec output"))?; + .ok_or_else(|| anyhow::anyhow!("missing Output section in unified exec output {raw}"))?; let chunk_id = captures .name("chunk_id") @@ -1368,6 +1368,8 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> { } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] +// Skipped on arm because the ctor logic to handle arg0 doesn't work on ARM +#[cfg(not(target_arch = "arm"))] async fn unified_exec_formats_large_output_summary() -> Result<()> { skip_if_no_network!(Ok(())); skip_if_sandbox!(Ok(())); @@ -1451,3 +1453,75 @@ PY Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn unified_exec_runs_under_sandbox() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = start_mock_server().await; + + let mut builder = test_codex().with_config(|config| { + config.features.enable(Feature::UnifiedExec); + }); + let TestCodex { + codex, + cwd, + session_configured, + .. + } = builder.build(&server).await?; + + let call_id = "uexec"; + let args = serde_json::json!({ + "cmd": "echo 'hello'", + "yield_time_ms": 500, + }); + + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + let session_model = session_configured.model.clone(); + + codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "summarize large output".into(), + }], + final_output_json_schema: None, + cwd: cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + // Important! + sandbox_policy: SandboxPolicy::ReadOnly, + model: session_model, + effort: None, + summary: ReasoningSummary::Auto, + }) + .await?; + + wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; + + let requests = server.received_requests().await.expect("recorded requests"); + assert!(!requests.is_empty(), "expected at least one POST request"); + + let bodies = requests + .iter() + .map(|req| req.body_json::().expect("request json")) + .collect::>(); + + let outputs = collect_tool_outputs(&bodies)?; + let output = outputs.get(call_id).expect("missing output"); + + assert_regex_match("hello[\r\n]+", &output.output); + + Ok(()) +} diff --git a/codex-rs/utils/pty/src/lib.rs b/codex-rs/utils/pty/src/lib.rs index 740a502b..14cc4307 100644 --- a/codex-rs/utils/pty/src/lib.rs +++ b/codex-rs/utils/pty/src/lib.rs @@ -111,6 +111,7 @@ pub async fn spawn_pty_process( args: &[String], cwd: &Path, env: &HashMap, + arg0: &Option, ) -> Result { if program.is_empty() { anyhow::bail!("missing program for PTY spawn"); @@ -124,7 +125,7 @@ pub async fn spawn_pty_process( pixel_height: 0, })?; - let mut command_builder = CommandBuilder::new(program); + let mut command_builder = CommandBuilder::new(arg0.as_ref().unwrap_or(&program.to_string())); command_builder.cwd(cwd); command_builder.env_clear(); for arg in args {