Use codex-linux-sandbox in unified exec (#6480)
Unified exec isn't working on Linux because we don't provide the correct arg0. The library we use for pty management doesn't allow setting arg0 separately from executable. Use the same aliasing strategy we use for `apply_patch` for `codex-linux-sandbox`. Use `#[ctor]` hack to dispatch codex-linux-sandbox calls. Addresses https://github.com/openai/codex/issues/6450
This commit is contained in:
2
codex-rs/Cargo.lock
generated
2
codex-rs/Cargo.lock
generated
@@ -1067,6 +1067,7 @@ dependencies = [
|
|||||||
"chrono",
|
"chrono",
|
||||||
"codex-app-server-protocol",
|
"codex-app-server-protocol",
|
||||||
"codex-apply-patch",
|
"codex-apply-patch",
|
||||||
|
"codex-arg0",
|
||||||
"codex-async-utils",
|
"codex-async-utils",
|
||||||
"codex-file-search",
|
"codex-file-search",
|
||||||
"codex-git",
|
"codex-git",
|
||||||
@@ -1081,6 +1082,7 @@ dependencies = [
|
|||||||
"codex-windows-sandbox",
|
"codex-windows-sandbox",
|
||||||
"core-foundation 0.9.4",
|
"core-foundation 0.9.4",
|
||||||
"core_test_support",
|
"core_test_support",
|
||||||
|
"ctor 0.5.0",
|
||||||
"dirs",
|
"dirs",
|
||||||
"dunce",
|
"dunce",
|
||||||
"env-flags",
|
"env-flags",
|
||||||
|
|||||||
@@ -11,32 +11,7 @@ const LINUX_SANDBOX_ARG0: &str = "codex-linux-sandbox";
|
|||||||
const APPLY_PATCH_ARG0: &str = "apply_patch";
|
const APPLY_PATCH_ARG0: &str = "apply_patch";
|
||||||
const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch";
|
const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch";
|
||||||
|
|
||||||
/// While we want to deploy the Codex CLI as a single executable for simplicity,
|
pub fn arg0_dispatch() -> Option<TempDir> {
|
||||||
/// we also want to expose some of its functionality as distinct CLIs, so we use
|
|
||||||
/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows
|
|
||||||
/// us to simulate deploying multiple executables as a single binary on Mac and
|
|
||||||
/// Linux (but not Windows).
|
|
||||||
///
|
|
||||||
/// When the current executable is invoked through the hard-link or alias named
|
|
||||||
/// `codex-linux-sandbox` we *directly* execute
|
|
||||||
/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we:
|
|
||||||
///
|
|
||||||
/// 1. Load `.env` values from `~/.codex/.env` before creating any threads.
|
|
||||||
/// 2. Construct a Tokio multi-thread runtime.
|
|
||||||
/// 3. Derive the path to the current executable (so children can re-invoke the
|
|
||||||
/// sandbox) when running on Linux.
|
|
||||||
/// 4. Execute the provided async `main_fn` inside that runtime, forwarding any
|
|
||||||
/// error. Note that `main_fn` receives `codex_linux_sandbox_exe:
|
|
||||||
/// Option<PathBuf>`, as an argument, which is generally needed as part of
|
|
||||||
/// constructing [`codex_core::config::Config`].
|
|
||||||
///
|
|
||||||
/// This function should be used to wrap any `main()` function in binary crates
|
|
||||||
/// in this workspace that depends on these helper CLIs.
|
|
||||||
pub fn arg0_dispatch_or_else<F, Fut>(main_fn: F) -> anyhow::Result<()>
|
|
||||||
where
|
|
||||||
F: FnOnce(Option<PathBuf>) -> Fut,
|
|
||||||
Fut: Future<Output = anyhow::Result<()>>,
|
|
||||||
{
|
|
||||||
// Determine if we were invoked via the special alias.
|
// Determine if we were invoked via the special alias.
|
||||||
let mut args = std::env::args_os();
|
let mut args = std::env::args_os();
|
||||||
let argv0 = args.next().unwrap_or_default();
|
let argv0 = args.next().unwrap_or_default();
|
||||||
@@ -76,10 +51,7 @@ where
|
|||||||
// before creating any threads/the Tokio runtime.
|
// before creating any threads/the Tokio runtime.
|
||||||
load_dotenv();
|
load_dotenv();
|
||||||
|
|
||||||
// Retain the TempDir so it exists for the lifetime of the invocation of
|
match prepend_path_entry_for_codex_aliases() {
|
||||||
// this executable. Admittedly, we could invoke `keep()` on it, but it
|
|
||||||
// would be nice to avoid leaving temporary directories behind, if possible.
|
|
||||||
let _path_entry = match prepend_path_entry_for_apply_patch() {
|
|
||||||
Ok(path_entry) => Some(path_entry),
|
Ok(path_entry) => Some(path_entry),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
// It is possible that Codex will proceed successfully even if
|
// It is possible that Codex will proceed successfully even if
|
||||||
@@ -87,7 +59,39 @@ where
|
|||||||
eprintln!("WARNING: proceeding, even though we could not update PATH: {err}");
|
eprintln!("WARNING: proceeding, even though we could not update PATH: {err}");
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// While we want to deploy the Codex CLI as a single executable for simplicity,
|
||||||
|
/// we also want to expose some of its functionality as distinct CLIs, so we use
|
||||||
|
/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows
|
||||||
|
/// us to simulate deploying multiple executables as a single binary on Mac and
|
||||||
|
/// Linux (but not Windows).
|
||||||
|
///
|
||||||
|
/// When the current executable is invoked through the hard-link or alias named
|
||||||
|
/// `codex-linux-sandbox` we *directly* execute
|
||||||
|
/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we:
|
||||||
|
///
|
||||||
|
/// 1. Load `.env` values from `~/.codex/.env` before creating any threads.
|
||||||
|
/// 2. Construct a Tokio multi-thread runtime.
|
||||||
|
/// 3. Derive the path to the current executable (so children can re-invoke the
|
||||||
|
/// sandbox) when running on Linux.
|
||||||
|
/// 4. Execute the provided async `main_fn` inside that runtime, forwarding any
|
||||||
|
/// error. Note that `main_fn` receives `codex_linux_sandbox_exe:
|
||||||
|
/// Option<PathBuf>`, as an argument, which is generally needed as part of
|
||||||
|
/// constructing [`codex_core::config::Config`].
|
||||||
|
///
|
||||||
|
/// This function should be used to wrap any `main()` function in binary crates
|
||||||
|
/// in this workspace that depends on these helper CLIs.
|
||||||
|
pub fn arg0_dispatch_or_else<F, Fut>(main_fn: F) -> anyhow::Result<()>
|
||||||
|
where
|
||||||
|
F: FnOnce(Option<PathBuf>) -> Fut,
|
||||||
|
Fut: Future<Output = anyhow::Result<()>>,
|
||||||
|
{
|
||||||
|
// Retain the TempDir so it exists for the lifetime of the invocation of
|
||||||
|
// this executable. Admittedly, we could invoke `keep()` on it, but it
|
||||||
|
// would be nice to avoid leaving temporary directories behind, if possible.
|
||||||
|
let _path_entry = arg0_dispatch();
|
||||||
|
|
||||||
// Regular invocation – create a Tokio runtime and execute the provided
|
// Regular invocation – create a Tokio runtime and execute the provided
|
||||||
// async entry-point.
|
// async entry-point.
|
||||||
@@ -144,11 +148,16 @@ where
|
|||||||
///
|
///
|
||||||
/// IMPORTANT: This function modifies the PATH environment variable, so it MUST
|
/// IMPORTANT: This function modifies the PATH environment variable, so it MUST
|
||||||
/// be called before multiple threads are spawned.
|
/// be called before multiple threads are spawned.
|
||||||
fn prepend_path_entry_for_apply_patch() -> std::io::Result<TempDir> {
|
pub fn prepend_path_entry_for_codex_aliases() -> std::io::Result<TempDir> {
|
||||||
let temp_dir = TempDir::new()?;
|
let temp_dir = TempDir::new()?;
|
||||||
let path = temp_dir.path();
|
let path = temp_dir.path();
|
||||||
|
|
||||||
for filename in &[APPLY_PATCH_ARG0, MISSPELLED_APPLY_PATCH_ARG0] {
|
for filename in &[
|
||||||
|
APPLY_PATCH_ARG0,
|
||||||
|
MISSPELLED_APPLY_PATCH_ARG0,
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
LINUX_SANDBOX_ARG0,
|
||||||
|
] {
|
||||||
let exe = std::env::current_exe()?;
|
let exe = std::env::current_exe()?;
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ codex-utils-pty = { workspace = true }
|
|||||||
codex-utils-readiness = { workspace = true }
|
codex-utils-readiness = { workspace = true }
|
||||||
codex-utils-string = { workspace = true }
|
codex-utils-string = { workspace = true }
|
||||||
codex-utils-tokenizer = { workspace = true }
|
codex-utils-tokenizer = { workspace = true }
|
||||||
|
codex-windows-sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" }
|
||||||
dirs = { workspace = true }
|
dirs = { workspace = true }
|
||||||
dunce = { workspace = true }
|
dunce = { workspace = true }
|
||||||
env-flags = { workspace = true }
|
env-flags = { workspace = true }
|
||||||
@@ -83,7 +84,6 @@ tree-sitter-bash = { workspace = true }
|
|||||||
uuid = { workspace = true, features = ["serde", "v4", "v5"] }
|
uuid = { workspace = true, features = ["serde", "v4", "v5"] }
|
||||||
which = { workspace = true }
|
which = { workspace = true }
|
||||||
wildmatch = { workspace = true }
|
wildmatch = { workspace = true }
|
||||||
codex_windows_sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" }
|
|
||||||
|
|
||||||
|
|
||||||
[target.'cfg(target_os = "linux")'.dependencies]
|
[target.'cfg(target_os = "linux")'.dependencies]
|
||||||
@@ -104,7 +104,9 @@ openssl-sys = { workspace = true, features = ["vendored"] }
|
|||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
assert_cmd = { workspace = true }
|
assert_cmd = { workspace = true }
|
||||||
assert_matches = { workspace = true }
|
assert_matches = { workspace = true }
|
||||||
|
codex-arg0 = { workspace = true }
|
||||||
core_test_support = { workspace = true }
|
core_test_support = { workspace = true }
|
||||||
|
ctor = { workspace = true }
|
||||||
escargot = { workspace = true }
|
escargot = { workspace = true }
|
||||||
image = { workspace = true, features = ["jpeg", "png"] }
|
image = { workspace = true, features = ["jpeg", "png"] }
|
||||||
maplit = { workspace = true }
|
maplit = { workspace = true }
|
||||||
|
|||||||
@@ -300,8 +300,14 @@ impl UnifiedExecSessionManager {
|
|||||||
.command
|
.command
|
||||||
.split_first()
|
.split_first()
|
||||||
.ok_or(UnifiedExecError::MissingCommandLine)?;
|
.ok_or(UnifiedExecError::MissingCommandLine)?;
|
||||||
let spawned =
|
|
||||||
codex_utils_pty::spawn_pty_process(program, args, env.cwd.as_path(), &env.env)
|
let spawned = codex_utils_pty::spawn_pty_process(
|
||||||
|
program,
|
||||||
|
args,
|
||||||
|
env.cwd.as_path(),
|
||||||
|
&env.env,
|
||||||
|
&env.arg0,
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
.map_err(|err| UnifiedExecError::create_session(err.to_string()))?;
|
.map_err(|err| UnifiedExecError::create_session(err.to_string()))?;
|
||||||
UnifiedExecSession::from_spawned(spawned, env.sandbox).await
|
UnifiedExecSession::from_spawned(spawned, env.sandbox).await
|
||||||
|
|||||||
@@ -1,4 +1,17 @@
|
|||||||
// Aggregates all former standalone integration tests as modules.
|
// Aggregates all former standalone integration tests as modules.
|
||||||
|
use codex_arg0::arg0_dispatch;
|
||||||
|
use ctor::ctor;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
|
// This code runs before any other tests are run.
|
||||||
|
// It allows the test binary to behave like codex and dispatch to apply_patch and codex-linux-sandbox
|
||||||
|
// based on the arg0.
|
||||||
|
// NOTE: this doesn't work on ARM
|
||||||
|
#[ctor]
|
||||||
|
pub static CODEX_ALIASES_TEMP_DIR: TempDir = unsafe {
|
||||||
|
#[allow(clippy::unwrap_used)]
|
||||||
|
arg0_dispatch().unwrap()
|
||||||
|
};
|
||||||
|
|
||||||
#[cfg(not(target_os = "windows"))]
|
#[cfg(not(target_os = "windows"))]
|
||||||
mod abort_tasks;
|
mod abort_tasks;
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ fn parse_unified_exec_output(raw: &str) -> Result<ParsedUnifiedExecOutput> {
|
|||||||
let cleaned = raw.trim_matches('\r');
|
let cleaned = raw.trim_matches('\r');
|
||||||
let captures = regex
|
let captures = regex
|
||||||
.captures(cleaned)
|
.captures(cleaned)
|
||||||
.ok_or_else(|| anyhow::anyhow!("missing Output section in unified exec output"))?;
|
.ok_or_else(|| anyhow::anyhow!("missing Output section in unified exec output {raw}"))?;
|
||||||
|
|
||||||
let chunk_id = captures
|
let chunk_id = captures
|
||||||
.name("chunk_id")
|
.name("chunk_id")
|
||||||
@@ -1368,6 +1368,8 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
// Skipped on arm because the ctor logic to handle arg0 doesn't work on ARM
|
||||||
|
#[cfg(not(target_arch = "arm"))]
|
||||||
async fn unified_exec_formats_large_output_summary() -> Result<()> {
|
async fn unified_exec_formats_large_output_summary() -> Result<()> {
|
||||||
skip_if_no_network!(Ok(()));
|
skip_if_no_network!(Ok(()));
|
||||||
skip_if_sandbox!(Ok(()));
|
skip_if_sandbox!(Ok(()));
|
||||||
@@ -1451,3 +1453,75 @@ PY
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
|
async fn unified_exec_runs_under_sandbox() -> Result<()> {
|
||||||
|
skip_if_no_network!(Ok(()));
|
||||||
|
skip_if_sandbox!(Ok(()));
|
||||||
|
|
||||||
|
let server = start_mock_server().await;
|
||||||
|
|
||||||
|
let mut builder = test_codex().with_config(|config| {
|
||||||
|
config.features.enable(Feature::UnifiedExec);
|
||||||
|
});
|
||||||
|
let TestCodex {
|
||||||
|
codex,
|
||||||
|
cwd,
|
||||||
|
session_configured,
|
||||||
|
..
|
||||||
|
} = builder.build(&server).await?;
|
||||||
|
|
||||||
|
let call_id = "uexec";
|
||||||
|
let args = serde_json::json!({
|
||||||
|
"cmd": "echo 'hello'",
|
||||||
|
"yield_time_ms": 500,
|
||||||
|
});
|
||||||
|
|
||||||
|
let responses = vec![
|
||||||
|
sse(vec![
|
||||||
|
ev_response_created("resp-1"),
|
||||||
|
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||||
|
ev_completed("resp-1"),
|
||||||
|
]),
|
||||||
|
sse(vec![
|
||||||
|
ev_assistant_message("msg-1", "done"),
|
||||||
|
ev_completed("resp-2"),
|
||||||
|
]),
|
||||||
|
];
|
||||||
|
mount_sse_sequence(&server, responses).await;
|
||||||
|
|
||||||
|
let session_model = session_configured.model.clone();
|
||||||
|
|
||||||
|
codex
|
||||||
|
.submit(Op::UserTurn {
|
||||||
|
items: vec![UserInput::Text {
|
||||||
|
text: "summarize large output".into(),
|
||||||
|
}],
|
||||||
|
final_output_json_schema: None,
|
||||||
|
cwd: cwd.path().to_path_buf(),
|
||||||
|
approval_policy: AskForApproval::Never,
|
||||||
|
// Important!
|
||||||
|
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||||
|
model: session_model,
|
||||||
|
effort: None,
|
||||||
|
summary: ReasoningSummary::Auto,
|
||||||
|
})
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||||
|
|
||||||
|
let requests = server.received_requests().await.expect("recorded requests");
|
||||||
|
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||||
|
|
||||||
|
let bodies = requests
|
||||||
|
.iter()
|
||||||
|
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let outputs = collect_tool_outputs(&bodies)?;
|
||||||
|
let output = outputs.get(call_id).expect("missing output");
|
||||||
|
|
||||||
|
assert_regex_match("hello[\r\n]+", &output.output);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|||||||
@@ -111,6 +111,7 @@ pub async fn spawn_pty_process(
|
|||||||
args: &[String],
|
args: &[String],
|
||||||
cwd: &Path,
|
cwd: &Path,
|
||||||
env: &HashMap<String, String>,
|
env: &HashMap<String, String>,
|
||||||
|
arg0: &Option<String>,
|
||||||
) -> Result<SpawnedPty> {
|
) -> Result<SpawnedPty> {
|
||||||
if program.is_empty() {
|
if program.is_empty() {
|
||||||
anyhow::bail!("missing program for PTY spawn");
|
anyhow::bail!("missing program for PTY spawn");
|
||||||
@@ -124,7 +125,7 @@ pub async fn spawn_pty_process(
|
|||||||
pixel_height: 0,
|
pixel_height: 0,
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let mut command_builder = CommandBuilder::new(program);
|
let mut command_builder = CommandBuilder::new(arg0.as_ref().unwrap_or(&program.to_string()));
|
||||||
command_builder.cwd(cwd);
|
command_builder.cwd(cwd);
|
||||||
command_builder.env_clear();
|
command_builder.env_clear();
|
||||||
for arg in args {
|
for arg in args {
|
||||||
|
|||||||
Reference in New Issue
Block a user