bug: sandbox denied error logs (#4874)

Check on STDOUT / STDERR or aggregated output for some logs when sanbox is denied
2025-10-09 17:01:01 +01:00
parent 0026b12615
commit ca6a0358de
4 changed files with 237 additions and 89 deletions
--- a/codex-rs/core/src/error.rs
+++ b/codex-rs/core/src/error.rs
@@ -1,6 +1,7 @@
 use crate::exec::ExecToolCallOutput;
 use crate::token_data::KnownPlan;
 use crate::token_data::PlanType;
+use crate::truncate::truncate_middle;
 use codex_protocol::ConversationId;
 use codex_protocol::protocol::RateLimitSnapshot;
 use reqwest::StatusCode;
@@ -12,6 +13,9 @@ use tokio::task::JoinError;

 pub type Result<T> = std::result::Result<T, CodexErr>;

+/// Limit UI error messages to a reasonable size while keeping useful context.
+const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB
+
 #[derive(Error, Debug)]
 pub enum SandboxErr {
    /// Error from sandbox execution
@@ -304,21 +308,44 @@ impl CodexErr {
 }

 pub fn get_error_message_ui(e: &CodexErr) -> String {
-    match e {
-        CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
+    let message = match e {
+        CodexErr::Sandbox(SandboxErr::Denied { output }) => {
+            let aggregated = output.aggregated_output.text.trim();
+            if !aggregated.is_empty() {
+                output.aggregated_output.text.clone()
+            } else {
+                let stderr = output.stderr.text.trim();
+                let stdout = output.stdout.text.trim();
+                match (stderr.is_empty(), stdout.is_empty()) {
+                    (false, false) => format!("{stderr}\n{stdout}"),
+                    (false, true) => output.stderr.text.clone(),
+                    (true, false) => output.stdout.text.clone(),
+                    (true, true) => format!(
+                        "command failed inside sandbox with exit code {}",
+                        output.exit_code
+                    ),
+                }
+            }
+        }
        // Timeouts are not sandbox errors from a UX perspective; present them plainly
-        CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
-            "error: command timed out after {} ms",
-            output.duration.as_millis()
-        ),
+        CodexErr::Sandbox(SandboxErr::Timeout { output }) => {
+            format!(
+                "error: command timed out after {} ms",
+                output.duration.as_millis()
+            )
+        }
        _ => e.to_string(),
-    }
+    };
+
+    truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0
 }

 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::exec::StreamOutput;
    use codex_protocol::protocol::RateLimitWindow;
+    use pretty_assertions::assert_eq;

    fn rate_limit_snapshot() -> RateLimitSnapshot {
        RateLimitSnapshot {
@@ -348,6 +375,73 @@ mod tests {
        );
    }

+    #[test]
+    fn sandbox_denied_uses_aggregated_output_when_stderr_empty() {
+        let output = ExecToolCallOutput {
+            exit_code: 77,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new("aggregate detail".to_string()),
+            duration: Duration::from_millis(10),
+            timed_out: false,
+        };
+        let err = CodexErr::Sandbox(SandboxErr::Denied {
+            output: Box::new(output),
+        });
+        assert_eq!(get_error_message_ui(&err), "aggregate detail");
+    }
+
+    #[test]
+    fn sandbox_denied_reports_both_streams_when_available() {
+        let output = ExecToolCallOutput {
+            exit_code: 9,
+            stdout: StreamOutput::new("stdout detail".to_string()),
+            stderr: StreamOutput::new("stderr detail".to_string()),
+            aggregated_output: StreamOutput::new(String::new()),
+            duration: Duration::from_millis(10),
+            timed_out: false,
+        };
+        let err = CodexErr::Sandbox(SandboxErr::Denied {
+            output: Box::new(output),
+        });
+        assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail");
+    }
+
+    #[test]
+    fn sandbox_denied_reports_stdout_when_no_stderr() {
+        let output = ExecToolCallOutput {
+            exit_code: 11,
+            stdout: StreamOutput::new("stdout only".to_string()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(String::new()),
+            duration: Duration::from_millis(8),
+            timed_out: false,
+        };
+        let err = CodexErr::Sandbox(SandboxErr::Denied {
+            output: Box::new(output),
+        });
+        assert_eq!(get_error_message_ui(&err), "stdout only");
+    }
+
+    #[test]
+    fn sandbox_denied_reports_exit_code_when_no_output_available() {
+        let output = ExecToolCallOutput {
+            exit_code: 13,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new(String::new()),
+            duration: Duration::from_millis(5),
+            timed_out: false,
+        };
+        let err = CodexErr::Sandbox(SandboxErr::Denied {
+            output: Box::new(output),
+        });
+        assert_eq!(
+            get_error_message_ui(&err),
+            "command failed inside sandbox with exit code 13"
+        );
+    }
+
    #[test]
    fn usage_limit_reached_error_formats_free_plan() {
        let err = UsageLimitReachedError {
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -177,7 +177,7 @@ pub async fn process_exec_tool_call(
                }));
            }

-            if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
+            if is_likely_sandbox_denied(sandbox_type, &exec_output) {
                return Err(CodexErr::Sandbox(SandboxErr::Denied {
                    output: Box::new(exec_output),
                }));
@@ -195,21 +195,57 @@ pub async fn process_exec_tool_call(
 /// We don't have a fully deterministic way to tell if our command failed
 /// because of the sandbox - a command in the user's zshrc file might hit an
 /// error, but the command itself might fail or succeed for other reasons.
-/// For now, we conservatively check for 'command not found' (exit code 127),
-/// and can add additional cases as necessary.
-fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
-    if sandbox_type == SandboxType::None {
+/// For now, we conservatively check for well known command failure exit codes and
+/// also look for common sandbox denial keywords in the command output.
+fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
+    if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
        return false;
    }

    // Quick rejects: well-known non-sandbox shell exit codes
-    // 127: command not found, 2: misuse of shell builtins
-    if exit_code == 127 {
+    // 2: misuse of shell builtins
+    // 126: permission denied
+    // 127: command not found
+    const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
+    if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
        return false;
    }

-    // For all other cases, we assume the sandbox is the cause
-    true
+    const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
+        "operation not permitted",
+        "permission denied",
+        "read-only file system",
+        "seccomp",
+        "sandbox",
+        "landlock",
+    ];
+
+    if [
+        &exec_output.stderr.text,
+        &exec_output.stdout.text,
+        &exec_output.aggregated_output.text,
+    ]
+    .into_iter()
+    .any(|section| {
+        let lower = section.to_lowercase();
+        SANDBOX_DENIED_KEYWORDS
+            .iter()
+            .any(|needle| lower.contains(needle))
+    }) {
+        return true;
+    }
+
+    #[cfg(unix)]
+    {
+        const SIGSYS_CODE: i32 = libc::SIGSYS;
+        if sandbox_type == SandboxType::LinuxSeccomp
+            && exec_output.exit_code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE
+        {
+            return true;
+        }
+    }
+
+    false
 }

 #[derive(Debug)]
@@ -436,3 +472,77 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
    #[expect(clippy::unwrap_used)]
    std::process::ExitStatus::from_raw(code.try_into().unwrap())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::Duration;
+
+    fn make_exec_output(
+        exit_code: i32,
+        stdout: &str,
+        stderr: &str,
+        aggregated: &str,
+    ) -> ExecToolCallOutput {
+        ExecToolCallOutput {
+            exit_code,
+            stdout: StreamOutput::new(stdout.to_string()),
+            stderr: StreamOutput::new(stderr.to_string()),
+            aggregated_output: StreamOutput::new(aggregated.to_string()),
+            duration: Duration::from_millis(1),
+            timed_out: false,
+        }
+    }
+
+    #[test]
+    fn sandbox_detection_requires_keywords() {
+        let output = make_exec_output(1, "", "", "");
+        assert!(!is_likely_sandbox_denied(
+            SandboxType::LinuxSeccomp,
+            &output
+        ));
+    }
+
+    #[test]
+    fn sandbox_detection_identifies_keyword_in_stderr() {
+        let output = make_exec_output(1, "", "Operation not permitted", "");
+        assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
+    }
+
+    #[test]
+    fn sandbox_detection_respects_quick_reject_exit_codes() {
+        let output = make_exec_output(127, "", "command not found", "");
+        assert!(!is_likely_sandbox_denied(
+            SandboxType::LinuxSeccomp,
+            &output
+        ));
+    }
+
+    #[test]
+    fn sandbox_detection_ignores_non_sandbox_mode() {
+        let output = make_exec_output(1, "", "Operation not permitted", "");
+        assert!(!is_likely_sandbox_denied(SandboxType::None, &output));
+    }
+
+    #[test]
+    fn sandbox_detection_uses_aggregated_output() {
+        let output = make_exec_output(
+            101,
+            "",
+            "",
+            "cargo failed: Read-only file system when writing target",
+        );
+        assert!(is_likely_sandbox_denied(
+            SandboxType::MacosSeatbelt,
+            &output
+        ));
+    }
+
+    #[cfg(unix)]
+    #[test]
+    fn sandbox_detection_flags_sigsys_exit_code() {
+        let exit_code = EXIT_CODE_SIGNAL_BASE + libc::SIGSYS;
+        let output = make_exec_output(exit_code, "", "", "");
+        assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
+    }
+}
--- a/codex-rs/core/src/executor/runner.rs
+++ b/codex-rs/core/src/executor/runner.rs
@@ -380,6 +380,23 @@ mod tests {
        assert_eq!(message, "failed in sandbox: sandbox stderr");
    }

+    #[test]
+    fn sandbox_failure_message_falls_back_to_aggregated_output() {
+        let output = ExecToolCallOutput {
+            exit_code: 101,
+            stdout: StreamOutput::new(String::new()),
+            stderr: StreamOutput::new(String::new()),
+            aggregated_output: StreamOutput::new("aggregate text".to_string()),
+            duration: Duration::from_millis(10),
+            timed_out: false,
+        };
+        let err = SandboxErr::Denied {
+            output: Box::new(output),
+        };
+        let message = sandbox_failure_message(err);
+        assert_eq!(message, "failed in sandbox: aggregate text");
+    }
+
    #[test]
    fn normalize_function_error_synthesizes_payload() {
        let err = FunctionCallError::RespondToModel("boom".to_string());
--- a/codex-rs/core/tests/suite/tools.rs
+++ b/codex-rs/core/tests/suite/tools.rs
@@ -403,79 +403,6 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
    Ok(())
 }

-#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
-    skip_if_no_network!(Ok(()));
-
-    let server = start_mock_server().await;
-    let mut builder = test_codex();
-    let test = builder.build(&server).await?;
-
-    let call_id = "shell-denied";
-    let long_line = "this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz";
-    let script = format!(
-        "for i in $(seq 1 500); do >&2 echo '{long_line}'; done; cat <<'EOF' > denied.txt\ncontent\nEOF",
-    );
-    let args = json!({
-        "command": ["/bin/sh", "-c", script],
-        "timeout_ms": 1_000,
-    });
-
-    mount_sse_once(
-        &server,
-        sse(vec![
-            ev_response_created("resp-1"),
-            ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
-            ev_completed("resp-1"),
-        ]),
-    )
-    .await;
-    let second_mock = mount_sse_once(
-        &server,
-        sse(vec![
-            ev_assistant_message("msg-1", "done"),
-            ev_completed("resp-2"),
-        ]),
-    )
-    .await;
-
-    submit_turn(
-        &test,
-        "attempt to write in read-only sandbox",
-        AskForApproval::Never,
-        SandboxPolicy::ReadOnly,
-    )
-    .await?;
-
-    let denied_item = second_mock.single_request().function_call_output(call_id);
-
-    let output = denied_item
-        .get("output")
-        .and_then(Value::as_str)
-        .expect("denied output string");
-
-    let sandbox_pattern = r#"(?s)^Exit code: -?\d+
-Wall time: [0-9]+(?:\.[0-9]+)? seconds
-Total output lines: \d+
-Output:
-
-failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*?
-\[\.{3} omitted \d+ of \d+ lines \.{3}\]
-.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
-\n?$"#;
-    let sandbox_regex = Regex::new(sandbox_pattern)?;
-    if !sandbox_regex.is_match(output) {
-        let fallback_pattern = r#"(?s)^Total output lines: \d+
-
-failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz
-.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
-.*(?:Operation not permitted|Permission denied|Read-only file system).*$"#;
-        assert_regex_match(fallback_pattern, output);
-    }
-
-    Ok(())
-}
-
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
    skip_if_no_network!(Ok(()));