diff --git a/codex-rs/core/src/tools/events.rs b/codex-rs/core/src/tools/events.rs index c6ef1ce4..9019dac5 100644 --- a/codex-rs/core/src/tools/events.rs +++ b/codex-rs/core/src/tools/events.rs @@ -274,31 +274,32 @@ impl ToolEmitter { ctx: ToolEventCtx<'_>, out: Result, ) -> Result { - let event; - let result = match out { + let (event, result) = match out { Ok(output) => { let content = super::format_exec_output_for_model(&output); let exit_code = output.exit_code; - event = ToolEventStage::Success(output); - if exit_code == 0 { + let event = ToolEventStage::Success(output); + let result = if exit_code == 0 { Ok(content) } else { Err(FunctionCallError::RespondToModel(content)) - } + }; + (event, result) } Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => { let response = super::format_exec_output_for_model(&output); - event = ToolEventStage::Failure(ToolEventFailure::Output(*output)); - Err(FunctionCallError::RespondToModel(response)) + let event = ToolEventStage::Failure(ToolEventFailure::Output(*output)); + let result = Err(FunctionCallError::RespondToModel(response)); + (event, result) } Err(ToolError::Codex(err)) => { let message = format!("execution error: {err:?}"); - let response = message.clone(); - event = ToolEventStage::Failure(ToolEventFailure::Message(message)); - Err(FunctionCallError::RespondToModel(response)) + let event = ToolEventStage::Failure(ToolEventFailure::Message(message.clone())); + let result = Err(FunctionCallError::RespondToModel(message)); + (event, result) } - Err(ToolError::Rejected(msg)) | Err(ToolError::SandboxDenied(msg)) => { + Err(ToolError::Rejected(msg)) => { // Normalize common rejection messages for exec tools so tests and // users see a clear, consistent phrase. let normalized = if msg == "rejected by user" { @@ -306,9 +307,9 @@ impl ToolEmitter { } else { msg }; - let response = &normalized; - event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone())); - Err(FunctionCallError::RespondToModel(response.clone())) + let event = ToolEventStage::Failure(ToolEventFailure::Message(normalized.clone())); + let result = Err(FunctionCallError::RespondToModel(normalized)); + (event, result) } }; self.emit(ctx, event).await; diff --git a/codex-rs/core/src/tools/orchestrator.rs b/codex-rs/core/src/tools/orchestrator.rs index e39c0703..9277af66 100644 --- a/codex-rs/core/src/tools/orchestrator.rs +++ b/codex-rs/core/src/tools/orchestrator.rs @@ -98,15 +98,16 @@ impl ToolOrchestrator { } Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => { if !tool.escalate_on_failure() { - return Err(ToolError::SandboxDenied( - "sandbox denied and no retry".to_string(), - )); + return Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { + output, + }))); } - // Under `Never` or `OnRequest`, do not retry without sandbox; surface a concise message - // derived from the actual output (platform-agnostic). + // Under `Never` or `OnRequest`, do not retry without sandbox; surface a concise + // sandbox denial that preserves the original output. if !tool.wants_no_sandbox_approval(approval_policy) { - let msg = build_never_denied_message_from_output(output.as_ref()); - return Err(ToolError::SandboxDenied(msg)); + return Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { + output, + }))); } // Ask for approval before retrying without sandbox. @@ -167,29 +168,6 @@ impl ToolOrchestrator { } } -fn build_never_denied_message_from_output(output: &ExecToolCallOutput) -> String { - let body = format!( - "{}\n{}\n{}", - output.stderr.text, output.stdout.text, output.aggregated_output.text - ) - .to_lowercase(); - - let detail = if body.contains("permission denied") { - Some("Permission denied") - } else if body.contains("operation not permitted") { - Some("Operation not permitted") - } else if body.contains("read-only file system") { - Some("Read-only file system") - } else { - None - }; - - match detail { - Some(tag) => format!("failed in sandbox: {tag}"), - None => "failed in sandbox".to_string(), - } -} - fn build_denial_reason_from_output(_output: &ExecToolCallOutput) -> String { // Keep approval reason terse and stable for UX/tests, but accept the // output so we can evolve heuristics later without touching call sites. diff --git a/codex-rs/core/src/tools/sandboxing.rs b/codex-rs/core/src/tools/sandboxing.rs index ad483e2f..da1c22b5 100644 --- a/codex-rs/core/src/tools/sandboxing.rs +++ b/codex-rs/core/src/tools/sandboxing.rs @@ -173,7 +173,6 @@ pub(crate) trait ProvidesSandboxRetryData { #[derive(Debug)] pub(crate) enum ToolError { Rejected(String), - SandboxDenied(String), Codex(CodexErr), } diff --git a/codex-rs/core/tests/suite/approvals.rs b/codex-rs/core/tests/suite/approvals.rs index 2d90d98f..cbb5898b 100644 --- a/codex-rs/core/tests/suite/approvals.rs +++ b/codex-rs/core/tests/suite/approvals.rs @@ -268,11 +268,22 @@ impl Expectation { "expected non-zero exit for {path:?}" ); for needle in *message_contains { - assert!( - result.stdout.contains(needle), - "stdout missing {needle:?}: {}", - result.stdout - ); + if needle.contains('|') { + let options: Vec<&str> = needle.split('|').collect(); + let matches_any = + options.iter().any(|option| result.stdout.contains(option)); + assert!( + matches_any, + "stdout missing one of {options:?}: {}", + result.stdout + ); + } else { + assert!( + result.stdout.contains(needle), + "stdout missing {needle:?}: {}", + result.stdout + ); + } } assert!( !path.exists(), @@ -901,7 +912,7 @@ fn scenarios() -> Vec { message_contains: if cfg!(target_os = "linux") { &["Permission denied"] } else { - &["failed in sandbox"] + &["Permission denied|Operation not permitted|Read-only file system"] }, }, }, @@ -1045,7 +1056,7 @@ fn scenarios() -> Vec { message_contains: if cfg!(target_os = "linux") { &["Permission denied"] } else { - &["failed in sandbox"] + &["Permission denied|Operation not permitted|Read-only file system"] }, }, }, diff --git a/codex-rs/core/tests/suite/tools.rs b/codex-rs/core/tests/suite/tools.rs index b9fc5118..a60257b6 100644 --- a/codex-rs/core/tests/suite/tools.rs +++ b/codex-rs/core/tests/suite/tools.rs @@ -1,6 +1,7 @@ #![cfg(not(target_os = "windows"))] #![allow(clippy::unwrap_used, clippy::expect_used)] +use anyhow::Context; use anyhow::Result; use codex_core::features::Feature; use codex_core::model_family::find_family_for_model; @@ -227,6 +228,103 @@ async fn shell_escalated_permissions_rejected_then_ok() -> Result<()> { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn sandbox_denied_shell_returns_original_output() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5-codex".to_string(); + config.model_family = + find_family_for_model("gpt-5-codex").expect("gpt-5-codex model family"); + }); + let fixture = builder.build(&server).await?; + + let call_id = "sandbox-denied-shell"; + let target_path = fixture.workspace_path("sandbox-denied.txt"); + let sentinel = "sandbox-denied sentinel output"; + let command = vec![ + "/bin/sh".to_string(), + "-c".to_string(), + format!( + "printf {sentinel:?}; printf {content:?} > {path:?}", + sentinel = format!("{sentinel}\n"), + content = "sandbox denied", + path = &target_path + ), + ]; + let args = json!({ + "command": command, + "timeout_ms": 1_000, + }); + + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + let mock = mount_sse_sequence(&server, responses).await; + + fixture + .submit_turn_with_policy( + "run a command that should be denied by the read-only sandbox", + SandboxPolicy::ReadOnly, + ) + .await?; + + let output_text = mock + .function_call_output_text(call_id) + .context("shell output present")?; + let exit_code_line = output_text + .lines() + .next() + .context("exit code line present")?; + let exit_code = exit_code_line + .strip_prefix("Exit code: ") + .context("exit code prefix present")? + .trim() + .parse::() + .context("exit code is integer")?; + let body = output_text; + + let body_lower = body.to_lowercase(); + // Required for multi-OS. + let has_denial = body_lower.contains("permission denied") + || body_lower.contains("operation not permitted") + || body_lower.contains("read-only file system"); + assert!( + has_denial, + "expected sandbox denial details in tool output: {body}" + ); + assert!( + body.contains(sentinel), + "expected sentinel output from command to reach the model: {body}" + ); + let target_path_str = target_path + .to_str() + .context("target path string representation")?; + assert!( + body.contains(target_path_str), + "expected sandbox error to mention denied path: {body}" + ); + assert!( + !body_lower.contains("failed in sandbox"), + "expected original tool output, found fallback message: {body}" + ); + assert_ne!( + exit_code, 0, + "sandbox denial should surface a non-zero exit code" + ); + + Ok(()) +} + async fn collect_tools(use_unified_exec: bool) -> Result> { let server = start_mock_server().await;