bug: sandbox denied error logs (#4874)

Check on STDOUT / STDERR or aggregated output for some logs when sanbox is denied
2025-10-09 17:01:01 +01:00
parent 0026b12615
commit ca6a0358de
4 changed files with 237 additions and 89 deletions
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -1,6 +1,7 @@
 use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
 use crate::truncate::truncate_middle;
 use codex_protocol::ConversationId;
 use codex_protocol::protocol::RateLimitSnapshot;
 use reqwest::StatusCode;
@@ -12,6 +13,9 @@ use tokio::task::JoinError;
 pub type Result<T> = std::result::Result<T, CodexErr>;
 /// Limit UI error messages to a reasonable size while keeping useful context.
 const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB
 #[derive(Error, Debug)]
 pub enum SandboxErr {
    /// Error from sandbox execution
@@ -304,21 +308,44 @@ impl CodexErr {
 }
 pub fn get_error_message_ui(e: &CodexErr) -> String {
-    match e {
+    let message = match e {
-        CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
+        CodexErr::Sandbox(SandboxErr::Denied { output }) => {
            let aggregated = output.aggregated_output.text.trim();
            if !aggregated.is_empty() {
                output.aggregated_output.text.clone()
            } else {
                let stderr = output.stderr.text.trim();
                let stdout = output.stdout.text.trim();
                match (stderr.is_empty(), stdout.is_empty()) {
                    (false, false) => format!("{stderr}\n{stdout}"),
                    (false, true) => output.stderr.text.clone(),
                    (true, false) => output.stdout.text.clone(),
                    (true, true) => format!(
                        "command failed inside sandbox with exit code {}",
                        output.exit_code
                    ),
                }
            }
        }
        // Timeouts are not sandbox errors from a UX perspective; present them plainly
-        CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
+        CodexErr::Sandbox(SandboxErr::Timeout { output }) => {
-            "error: command timed out after {} ms",
+            format!(
-            output.duration.as_millis()
+                "error: command timed out after {} ms",
-        ),
+                output.duration.as_millis()
            )
        }
        _ => e.to_string(),
-    }
+    };
    truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::exec::StreamOutput;
    use codex_protocol::protocol::RateLimitWindow;
    use pretty_assertions::assert_eq;
    fn rate_limit_snapshot() -> RateLimitSnapshot {
        RateLimitSnapshot {
@@ -348,6 +375,73 @@ mod tests {
        );
    }
    #[test]
    fn sandbox_denied_uses_aggregated_output_when_stderr_empty() {
        let output = ExecToolCallOutput {
            exit_code: 77,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new("aggregate detail".to_string()),
            duration: Duration::from_millis(10),
            timed_out: false,
        };
        let err = CodexErr::Sandbox(SandboxErr::Denied {
            output: Box::new(output),
        });
        assert_eq!(get_error_message_ui(&err), "aggregate detail");
    }
    #[test]
    fn sandbox_denied_reports_both_streams_when_available() {
        let output = ExecToolCallOutput {
            exit_code: 9,
            stdout: StreamOutput::new("stdout detail".to_string()),
            stderr: StreamOutput::new("stderr detail".to_string()),
            aggregated_output: StreamOutput::new(String::new()),
            duration: Duration::from_millis(10),
            timed_out: false,
        };
        let err = CodexErr::Sandbox(SandboxErr::Denied {
            output: Box::new(output),
        });
        assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail");
    }
    #[test]
    fn sandbox_denied_reports_stdout_when_no_stderr() {
        let output = ExecToolCallOutput {
            exit_code: 11,
            stdout: StreamOutput::new("stdout only".to_string()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new(String::new()),
            duration: Duration::from_millis(8),
            timed_out: false,
        };
        let err = CodexErr::Sandbox(SandboxErr::Denied {
            output: Box::new(output),
        });
        assert_eq!(get_error_message_ui(&err), "stdout only");
    }
    #[test]
    fn sandbox_denied_reports_exit_code_when_no_output_available() {
        let output = ExecToolCallOutput {
            exit_code: 13,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new(String::new()),
            duration: Duration::from_millis(5),
            timed_out: false,
        };
        let err = CodexErr::Sandbox(SandboxErr::Denied {
            output: Box::new(output),
        });
        assert_eq!(
            get_error_message_ui(&err),
            "command failed inside sandbox with exit code 13"
        );
    }
    #[test]
    fn usage_limit_reached_error_formats_free_plan() {
        let err = UsageLimitReachedError {
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -177,7 +177,7 @@ pub async fn process_exec_tool_call(
                }));
            }
-            if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
+            if is_likely_sandbox_denied(sandbox_type, &exec_output) {
                return Err(CodexErr::Sandbox(SandboxErr::Denied {
                    output: Box::new(exec_output),
                }));
@@ -195,21 +195,57 @@ pub async fn process_exec_tool_call(
 /// We don't have a fully deterministic way to tell if our command failed
 /// because of the sandbox - a command in the user's zshrc file might hit an
 /// error, but the command itself might fail or succeed for other reasons.
-/// For now, we conservatively check for 'command not found' (exit code 127),
+/// For now, we conservatively check for well known command failure exit codes and
-/// and can add additional cases as necessary.
+/// also look for common sandbox denial keywords in the command output.
-fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
+fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
-    if sandbox_type == SandboxType::None {
+    if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
        return false;
    }
    // Quick rejects: well-known non-sandbox shell exit codes
-    // 127: command not found, 2: misuse of shell builtins
+    // 2: misuse of shell builtins
-    if exit_code == 127 {
+    // 126: permission denied
    // 127: command not found
    const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
    if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
        return false;
    }
-    // For all other cases, we assume the sandbox is the cause
+    const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
-    true
+        "operation not permitted",
        "permission denied",
        "read-only file system",
        "seccomp",
        "sandbox",
        "landlock",
    ];
    if [
        &exec_output.stderr.text,
        &exec_output.stdout.text,
        &exec_output.aggregated_output.text,
    ]
    .into_iter()
    .any(|section| {
        let lower = section.to_lowercase();
        SANDBOX_DENIED_KEYWORDS
            .iter()
            .any(|needle| lower.contains(needle))
    }) {
        return true;
    }
    #[cfg(unix)]
    {
        const SIGSYS_CODE: i32 = libc::SIGSYS;
        if sandbox_type == SandboxType::LinuxSeccomp
            && exec_output.exit_code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE
        {
            return true;
        }
    }
    false
 }
 #[derive(Debug)]
@@ -436,3 +472,77 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
    #[expect(clippy::unwrap_used)]
    std::process::ExitStatus::from_raw(code.try_into().unwrap())
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use std::time::Duration;
    fn make_exec_output(
        exit_code: i32,
        stdout: &str,
        stderr: &str,
        aggregated: &str,
    ) -> ExecToolCallOutput {
        ExecToolCallOutput {
            exit_code,
            stdout: StreamOutput::new(stdout.to_string()),
            stderr: StreamOutput::new(stderr.to_string()),
            aggregated_output: StreamOutput::new(aggregated.to_string()),
            duration: Duration::from_millis(1),
            timed_out: false,
        }
    }
    #[test]
    fn sandbox_detection_requires_keywords() {
        let output = make_exec_output(1, "", "", "");
        assert!(!is_likely_sandbox_denied(
            SandboxType::LinuxSeccomp,
            &output
        ));
    }
    #[test]
    fn sandbox_detection_identifies_keyword_in_stderr() {
        let output = make_exec_output(1, "", "Operation not permitted", "");
        assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
    }
    #[test]
    fn sandbox_detection_respects_quick_reject_exit_codes() {
        let output = make_exec_output(127, "", "command not found", "");
        assert!(!is_likely_sandbox_denied(
            SandboxType::LinuxSeccomp,
            &output
        ));
    }
    #[test]
    fn sandbox_detection_ignores_non_sandbox_mode() {
        let output = make_exec_output(1, "", "Operation not permitted", "");
        assert!(!is_likely_sandbox_denied(SandboxType::None, &output));
    }
    #[test]
    fn sandbox_detection_uses_aggregated_output() {
        let output = make_exec_output(
            101,
            "",
            "",
            "cargo failed: Read-only file system when writing target",
        );
        assert!(is_likely_sandbox_denied(
            SandboxType::MacosSeatbelt,
            &output
        ));
    }
    #[cfg(unix)]
    #[test]
    fn sandbox_detection_flags_sigsys_exit_code() {
        let exit_code = EXIT_CODE_SIGNAL_BASE + libc::SIGSYS;
        let output = make_exec_output(exit_code, "", "", "");
        assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
    }
 }
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -380,6 +380,23 @@ mod tests {
        assert_eq!(message, "failed in sandbox: sandbox stderr");
    }
    #[test]
    fn sandbox_failure_message_falls_back_to_aggregated_output() {
        let output = ExecToolCallOutput {
            exit_code: 101,
            stdout: StreamOutput::new(String::new()),
            stderr: StreamOutput::new(String::new()),
            aggregated_output: StreamOutput::new("aggregate text".to_string()),
            duration: Duration::from_millis(10),
            timed_out: false,
        };
        let err = SandboxErr::Denied {
            output: Box::new(output),
        };
        let message = sandbox_failure_message(err);
        assert_eq!(message, "failed in sandbox: aggregate text");
    }
    #[test]
    fn normalize_function_error_synthesizes_payload() {
        let err = FunctionCallError::RespondToModel("boom".to_string());
--- a/codex-rs/core/tests/suite/tools.rs
+++ b/codex-rs/core/tests/suite/tools.rs
@@ -403,79 +403,6 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
    Ok(())
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
    skip_if_no_network!(Ok(()));
    let server = start_mock_server().await;
    let mut builder = test_codex();
    let test = builder.build(&server).await?;
    let call_id = "shell-denied";
    let long_line = "this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz";
    let script = format!(
        "for i in $(seq 1 500); do >&2 echo '{long_line}'; done; cat <<'EOF' > denied.txt\ncontent\nEOF",
    );
    let args = json!({
        "command": ["/bin/sh", "-c", script],
        "timeout_ms": 1_000,
    });
    mount_sse_once(
        &server,
        sse(vec![
            ev_response_created("resp-1"),
            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
            ev_completed("resp-1"),
        ]),
    )
    .await;
    let second_mock = mount_sse_once(
        &server,
        sse(vec![
            ev_assistant_message("msg-1", "done"),
            ev_completed("resp-2"),
        ]),
    )
    .await;
    submit_turn(
        &test,
        "attempt to write in read-only sandbox",
        AskForApproval::Never,
        SandboxPolicy::ReadOnly,
    )
    .await?;
    let denied_item = second_mock.single_request().function_call_output(call_id);
    let output = denied_item
        .get("output")
        .and_then(Value::as_str)
        .expect("denied output string");
    let sandbox_pattern = r#"(?s)^Exit code: -?\d+
 Wall time: [0-9]+(?:\.[0-9]+)? seconds
 Total output lines: \d+
 Output:
 failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*?
 \[\.{3} omitted \d+ of \d+ lines \.{3}\]
 .*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
 \n?$"#;
    let sandbox_regex = Regex::new(sandbox_pattern)?;
    if !sandbox_regex.is_match(output) {
        let fallback_pattern = r#"(?s)^Total output lines: \d+
 failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz
 .*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
 .*(?:Operation not permitted|Permission denied|Read-only file system).*$"#;
        assert_regex_match(fallback_pattern, output);
    }
    Ok(())
 }
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
    skip_if_no_network!(Ok(()));