Use codex-linux-sandbox in unified exec (#6480)
Unified exec isn't working on Linux because we don't provide the correct arg0. The library we use for pty management doesn't allow setting arg0 separately from executable. Use the same aliasing strategy we use for `apply_patch` for `codex-linux-sandbox`. Use `#[ctor]` hack to dispatch codex-linux-sandbox calls. Addresses https://github.com/openai/codex/issues/6450
This commit is contained in:
@@ -32,6 +32,7 @@ codex-utils-pty = { workspace = true }
|
||||
codex-utils-readiness = { workspace = true }
|
||||
codex-utils-string = { workspace = true }
|
||||
codex-utils-tokenizer = { workspace = true }
|
||||
codex-windows-sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" }
|
||||
dirs = { workspace = true }
|
||||
dunce = { workspace = true }
|
||||
env-flags = { workspace = true }
|
||||
@@ -83,7 +84,6 @@ tree-sitter-bash = { workspace = true }
|
||||
uuid = { workspace = true, features = ["serde", "v4", "v5"] }
|
||||
which = { workspace = true }
|
||||
wildmatch = { workspace = true }
|
||||
codex_windows_sandbox = { package = "codex-windows-sandbox", path = "../windows-sandbox-rs" }
|
||||
|
||||
|
||||
[target.'cfg(target_os = "linux")'.dependencies]
|
||||
@@ -104,7 +104,9 @@ openssl-sys = { workspace = true, features = ["vendored"] }
|
||||
[dev-dependencies]
|
||||
assert_cmd = { workspace = true }
|
||||
assert_matches = { workspace = true }
|
||||
codex-arg0 = { workspace = true }
|
||||
core_test_support = { workspace = true }
|
||||
ctor = { workspace = true }
|
||||
escargot = { workspace = true }
|
||||
image = { workspace = true, features = ["jpeg", "png"] }
|
||||
maplit = { workspace = true }
|
||||
|
||||
@@ -300,10 +300,16 @@ impl UnifiedExecSessionManager {
|
||||
.command
|
||||
.split_first()
|
||||
.ok_or(UnifiedExecError::MissingCommandLine)?;
|
||||
let spawned =
|
||||
codex_utils_pty::spawn_pty_process(program, args, env.cwd.as_path(), &env.env)
|
||||
.await
|
||||
.map_err(|err| UnifiedExecError::create_session(err.to_string()))?;
|
||||
|
||||
let spawned = codex_utils_pty::spawn_pty_process(
|
||||
program,
|
||||
args,
|
||||
env.cwd.as_path(),
|
||||
&env.env,
|
||||
&env.arg0,
|
||||
)
|
||||
.await
|
||||
.map_err(|err| UnifiedExecError::create_session(err.to_string()))?;
|
||||
UnifiedExecSession::from_spawned(spawned, env.sandbox).await
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,17 @@
|
||||
// Aggregates all former standalone integration tests as modules.
|
||||
use codex_arg0::arg0_dispatch;
|
||||
use ctor::ctor;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// This code runs before any other tests are run.
|
||||
// It allows the test binary to behave like codex and dispatch to apply_patch and codex-linux-sandbox
|
||||
// based on the arg0.
|
||||
// NOTE: this doesn't work on ARM
|
||||
#[ctor]
|
||||
pub static CODEX_ALIASES_TEMP_DIR: TempDir = unsafe {
|
||||
#[allow(clippy::unwrap_used)]
|
||||
arg0_dispatch().unwrap()
|
||||
};
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
mod abort_tasks;
|
||||
|
||||
@@ -66,7 +66,7 @@ fn parse_unified_exec_output(raw: &str) -> Result<ParsedUnifiedExecOutput> {
|
||||
let cleaned = raw.trim_matches('\r');
|
||||
let captures = regex
|
||||
.captures(cleaned)
|
||||
.ok_or_else(|| anyhow::anyhow!("missing Output section in unified exec output"))?;
|
||||
.ok_or_else(|| anyhow::anyhow!("missing Output section in unified exec output {raw}"))?;
|
||||
|
||||
let chunk_id = captures
|
||||
.name("chunk_id")
|
||||
@@ -1368,6 +1368,8 @@ async fn unified_exec_timeout_and_followup_poll() -> Result<()> {
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
// Skipped on arm because the ctor logic to handle arg0 doesn't work on ARM
|
||||
#[cfg(not(target_arch = "arm"))]
|
||||
async fn unified_exec_formats_large_output_summary() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
@@ -1451,3 +1453,75 @@ PY
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_runs_under_sandbox() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
});
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
session_configured,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let call_id = "uexec";
|
||||
let args = serde_json::json!({
|
||||
"cmd": "echo 'hello'",
|
||||
"yield_time_ms": 500,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
let session_model = session_configured.model.clone();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "summarize large output".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
// Important!
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
let requests = server.received_requests().await.expect("recorded requests");
|
||||
assert!(!requests.is_empty(), "expected at least one POST request");
|
||||
|
||||
let bodies = requests
|
||||
.iter()
|
||||
.map(|req| req.body_json::<Value>().expect("request json"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let outputs = collect_tool_outputs(&bodies)?;
|
||||
let output = outputs.get(call_id).expect("missing output");
|
||||
|
||||
assert_regex_match("hello[\r\n]+", &output.output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user