bug: sandbox denied error logs (#4874)
Check on STDOUT / STDERR or aggregated output for some logs when sanbox is denied
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::token_data::KnownPlan;
|
||||
use crate::token_data::PlanType;
|
||||
use crate::truncate::truncate_middle;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::protocol::RateLimitSnapshot;
|
||||
use reqwest::StatusCode;
|
||||
@@ -12,6 +13,9 @@ use tokio::task::JoinError;
|
||||
|
||||
pub type Result<T> = std::result::Result<T, CodexErr>;
|
||||
|
||||
/// Limit UI error messages to a reasonable size while keeping useful context.
|
||||
const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum SandboxErr {
|
||||
/// Error from sandbox execution
|
||||
@@ -304,21 +308,44 @@ impl CodexErr {
|
||||
}
|
||||
|
||||
pub fn get_error_message_ui(e: &CodexErr) -> String {
|
||||
match e {
|
||||
CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
|
||||
let message = match e {
|
||||
CodexErr::Sandbox(SandboxErr::Denied { output }) => {
|
||||
let aggregated = output.aggregated_output.text.trim();
|
||||
if !aggregated.is_empty() {
|
||||
output.aggregated_output.text.clone()
|
||||
} else {
|
||||
let stderr = output.stderr.text.trim();
|
||||
let stdout = output.stdout.text.trim();
|
||||
match (stderr.is_empty(), stdout.is_empty()) {
|
||||
(false, false) => format!("{stderr}\n{stdout}"),
|
||||
(false, true) => output.stderr.text.clone(),
|
||||
(true, false) => output.stdout.text.clone(),
|
||||
(true, true) => format!(
|
||||
"command failed inside sandbox with exit code {}",
|
||||
output.exit_code
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
// Timeouts are not sandbox errors from a UX perspective; present them plainly
|
||||
CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
|
||||
"error: command timed out after {} ms",
|
||||
output.duration.as_millis()
|
||||
),
|
||||
CodexErr::Sandbox(SandboxErr::Timeout { output }) => {
|
||||
format!(
|
||||
"error: command timed out after {} ms",
|
||||
output.duration.as_millis()
|
||||
)
|
||||
}
|
||||
_ => e.to_string(),
|
||||
}
|
||||
};
|
||||
|
||||
truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::exec::StreamOutput;
|
||||
use codex_protocol::protocol::RateLimitWindow;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
fn rate_limit_snapshot() -> RateLimitSnapshot {
|
||||
RateLimitSnapshot {
|
||||
@@ -348,6 +375,73 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_denied_uses_aggregated_output_when_stderr_empty() {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code: 77,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new("aggregate detail".to_string()),
|
||||
duration: Duration::from_millis(10),
|
||||
timed_out: false,
|
||||
};
|
||||
let err = CodexErr::Sandbox(SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
});
|
||||
assert_eq!(get_error_message_ui(&err), "aggregate detail");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_denied_reports_both_streams_when_available() {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code: 9,
|
||||
stdout: StreamOutput::new("stdout detail".to_string()),
|
||||
stderr: StreamOutput::new("stderr detail".to_string()),
|
||||
aggregated_output: StreamOutput::new(String::new()),
|
||||
duration: Duration::from_millis(10),
|
||||
timed_out: false,
|
||||
};
|
||||
let err = CodexErr::Sandbox(SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
});
|
||||
assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_denied_reports_stdout_when_no_stderr() {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code: 11,
|
||||
stdout: StreamOutput::new("stdout only".to_string()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(String::new()),
|
||||
duration: Duration::from_millis(8),
|
||||
timed_out: false,
|
||||
};
|
||||
let err = CodexErr::Sandbox(SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
});
|
||||
assert_eq!(get_error_message_ui(&err), "stdout only");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_denied_reports_exit_code_when_no_output_available() {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code: 13,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new(String::new()),
|
||||
duration: Duration::from_millis(5),
|
||||
timed_out: false,
|
||||
};
|
||||
let err = CodexErr::Sandbox(SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
});
|
||||
assert_eq!(
|
||||
get_error_message_ui(&err),
|
||||
"command failed inside sandbox with exit code 13"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn usage_limit_reached_error_formats_free_plan() {
|
||||
let err = UsageLimitReachedError {
|
||||
|
||||
@@ -177,7 +177,7 @@ pub async fn process_exec_tool_call(
|
||||
}));
|
||||
}
|
||||
|
||||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
|
||||
if is_likely_sandbox_denied(sandbox_type, &exec_output) {
|
||||
return Err(CodexErr::Sandbox(SandboxErr::Denied {
|
||||
output: Box::new(exec_output),
|
||||
}));
|
||||
@@ -195,21 +195,57 @@ pub async fn process_exec_tool_call(
|
||||
/// We don't have a fully deterministic way to tell if our command failed
|
||||
/// because of the sandbox - a command in the user's zshrc file might hit an
|
||||
/// error, but the command itself might fail or succeed for other reasons.
|
||||
/// For now, we conservatively check for 'command not found' (exit code 127),
|
||||
/// and can add additional cases as necessary.
|
||||
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
|
||||
if sandbox_type == SandboxType::None {
|
||||
/// For now, we conservatively check for well known command failure exit codes and
|
||||
/// also look for common sandbox denial keywords in the command output.
|
||||
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
|
||||
if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Quick rejects: well-known non-sandbox shell exit codes
|
||||
// 127: command not found, 2: misuse of shell builtins
|
||||
if exit_code == 127 {
|
||||
// 2: misuse of shell builtins
|
||||
// 126: permission denied
|
||||
// 127: command not found
|
||||
const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
|
||||
if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// For all other cases, we assume the sandbox is the cause
|
||||
true
|
||||
const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
|
||||
"operation not permitted",
|
||||
"permission denied",
|
||||
"read-only file system",
|
||||
"seccomp",
|
||||
"sandbox",
|
||||
"landlock",
|
||||
];
|
||||
|
||||
if [
|
||||
&exec_output.stderr.text,
|
||||
&exec_output.stdout.text,
|
||||
&exec_output.aggregated_output.text,
|
||||
]
|
||||
.into_iter()
|
||||
.any(|section| {
|
||||
let lower = section.to_lowercase();
|
||||
SANDBOX_DENIED_KEYWORDS
|
||||
.iter()
|
||||
.any(|needle| lower.contains(needle))
|
||||
}) {
|
||||
return true;
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
const SIGSYS_CODE: i32 = libc::SIGSYS;
|
||||
if sandbox_type == SandboxType::LinuxSeccomp
|
||||
&& exec_output.exit_code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -436,3 +472,77 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
|
||||
#[expect(clippy::unwrap_used)]
|
||||
std::process::ExitStatus::from_raw(code.try_into().unwrap())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::time::Duration;
|
||||
|
||||
fn make_exec_output(
|
||||
exit_code: i32,
|
||||
stdout: &str,
|
||||
stderr: &str,
|
||||
aggregated: &str,
|
||||
) -> ExecToolCallOutput {
|
||||
ExecToolCallOutput {
|
||||
exit_code,
|
||||
stdout: StreamOutput::new(stdout.to_string()),
|
||||
stderr: StreamOutput::new(stderr.to_string()),
|
||||
aggregated_output: StreamOutput::new(aggregated.to_string()),
|
||||
duration: Duration::from_millis(1),
|
||||
timed_out: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_detection_requires_keywords() {
|
||||
let output = make_exec_output(1, "", "", "");
|
||||
assert!(!is_likely_sandbox_denied(
|
||||
SandboxType::LinuxSeccomp,
|
||||
&output
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_detection_identifies_keyword_in_stderr() {
|
||||
let output = make_exec_output(1, "", "Operation not permitted", "");
|
||||
assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_detection_respects_quick_reject_exit_codes() {
|
||||
let output = make_exec_output(127, "", "command not found", "");
|
||||
assert!(!is_likely_sandbox_denied(
|
||||
SandboxType::LinuxSeccomp,
|
||||
&output
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_detection_ignores_non_sandbox_mode() {
|
||||
let output = make_exec_output(1, "", "Operation not permitted", "");
|
||||
assert!(!is_likely_sandbox_denied(SandboxType::None, &output));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_detection_uses_aggregated_output() {
|
||||
let output = make_exec_output(
|
||||
101,
|
||||
"",
|
||||
"",
|
||||
"cargo failed: Read-only file system when writing target",
|
||||
);
|
||||
assert!(is_likely_sandbox_denied(
|
||||
SandboxType::MacosSeatbelt,
|
||||
&output
|
||||
));
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn sandbox_detection_flags_sigsys_exit_code() {
|
||||
let exit_code = EXIT_CODE_SIGNAL_BASE + libc::SIGSYS;
|
||||
let output = make_exec_output(exit_code, "", "", "");
|
||||
assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -380,6 +380,23 @@ mod tests {
|
||||
assert_eq!(message, "failed in sandbox: sandbox stderr");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_failure_message_falls_back_to_aggregated_output() {
|
||||
let output = ExecToolCallOutput {
|
||||
exit_code: 101,
|
||||
stdout: StreamOutput::new(String::new()),
|
||||
stderr: StreamOutput::new(String::new()),
|
||||
aggregated_output: StreamOutput::new("aggregate text".to_string()),
|
||||
duration: Duration::from_millis(10),
|
||||
timed_out: false,
|
||||
};
|
||||
let err = SandboxErr::Denied {
|
||||
output: Box::new(output),
|
||||
};
|
||||
let message = sandbox_failure_message(err);
|
||||
assert_eq!(message, "failed in sandbox: aggregate text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_function_error_synthesizes_payload() {
|
||||
let err = FunctionCallError::RespondToModel("boom".to_string());
|
||||
|
||||
Reference in New Issue
Block a user