bug: sandbox denied error logs (#4874)
Check on STDOUT / STDERR or aggregated output for some logs when sanbox is denied
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
use crate::exec::ExecToolCallOutput;
|
use crate::exec::ExecToolCallOutput;
|
||||||
use crate::token_data::KnownPlan;
|
use crate::token_data::KnownPlan;
|
||||||
use crate::token_data::PlanType;
|
use crate::token_data::PlanType;
|
||||||
|
use crate::truncate::truncate_middle;
|
||||||
use codex_protocol::ConversationId;
|
use codex_protocol::ConversationId;
|
||||||
use codex_protocol::protocol::RateLimitSnapshot;
|
use codex_protocol::protocol::RateLimitSnapshot;
|
||||||
use reqwest::StatusCode;
|
use reqwest::StatusCode;
|
||||||
@@ -12,6 +13,9 @@ use tokio::task::JoinError;
|
|||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, CodexErr>;
|
pub type Result<T> = std::result::Result<T, CodexErr>;
|
||||||
|
|
||||||
|
/// Limit UI error messages to a reasonable size while keeping useful context.
|
||||||
|
const ERROR_MESSAGE_UI_MAX_BYTES: usize = 2 * 1024; // 4 KiB
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum SandboxErr {
|
pub enum SandboxErr {
|
||||||
/// Error from sandbox execution
|
/// Error from sandbox execution
|
||||||
@@ -304,21 +308,44 @@ impl CodexErr {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_error_message_ui(e: &CodexErr) -> String {
|
pub fn get_error_message_ui(e: &CodexErr) -> String {
|
||||||
match e {
|
let message = match e {
|
||||||
CodexErr::Sandbox(SandboxErr::Denied { output }) => output.stderr.text.clone(),
|
CodexErr::Sandbox(SandboxErr::Denied { output }) => {
|
||||||
|
let aggregated = output.aggregated_output.text.trim();
|
||||||
|
if !aggregated.is_empty() {
|
||||||
|
output.aggregated_output.text.clone()
|
||||||
|
} else {
|
||||||
|
let stderr = output.stderr.text.trim();
|
||||||
|
let stdout = output.stdout.text.trim();
|
||||||
|
match (stderr.is_empty(), stdout.is_empty()) {
|
||||||
|
(false, false) => format!("{stderr}\n{stdout}"),
|
||||||
|
(false, true) => output.stderr.text.clone(),
|
||||||
|
(true, false) => output.stdout.text.clone(),
|
||||||
|
(true, true) => format!(
|
||||||
|
"command failed inside sandbox with exit code {}",
|
||||||
|
output.exit_code
|
||||||
|
),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
// Timeouts are not sandbox errors from a UX perspective; present them plainly
|
// Timeouts are not sandbox errors from a UX perspective; present them plainly
|
||||||
CodexErr::Sandbox(SandboxErr::Timeout { output }) => format!(
|
CodexErr::Sandbox(SandboxErr::Timeout { output }) => {
|
||||||
"error: command timed out after {} ms",
|
format!(
|
||||||
output.duration.as_millis()
|
"error: command timed out after {} ms",
|
||||||
),
|
output.duration.as_millis()
|
||||||
|
)
|
||||||
|
}
|
||||||
_ => e.to_string(),
|
_ => e.to_string(),
|
||||||
}
|
};
|
||||||
|
|
||||||
|
truncate_middle(&message, ERROR_MESSAGE_UI_MAX_BYTES).0
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::exec::StreamOutput;
|
||||||
use codex_protocol::protocol::RateLimitWindow;
|
use codex_protocol::protocol::RateLimitWindow;
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
fn rate_limit_snapshot() -> RateLimitSnapshot {
|
fn rate_limit_snapshot() -> RateLimitSnapshot {
|
||||||
RateLimitSnapshot {
|
RateLimitSnapshot {
|
||||||
@@ -348,6 +375,73 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_denied_uses_aggregated_output_when_stderr_empty() {
|
||||||
|
let output = ExecToolCallOutput {
|
||||||
|
exit_code: 77,
|
||||||
|
stdout: StreamOutput::new(String::new()),
|
||||||
|
stderr: StreamOutput::new(String::new()),
|
||||||
|
aggregated_output: StreamOutput::new("aggregate detail".to_string()),
|
||||||
|
duration: Duration::from_millis(10),
|
||||||
|
timed_out: false,
|
||||||
|
};
|
||||||
|
let err = CodexErr::Sandbox(SandboxErr::Denied {
|
||||||
|
output: Box::new(output),
|
||||||
|
});
|
||||||
|
assert_eq!(get_error_message_ui(&err), "aggregate detail");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_denied_reports_both_streams_when_available() {
|
||||||
|
let output = ExecToolCallOutput {
|
||||||
|
exit_code: 9,
|
||||||
|
stdout: StreamOutput::new("stdout detail".to_string()),
|
||||||
|
stderr: StreamOutput::new("stderr detail".to_string()),
|
||||||
|
aggregated_output: StreamOutput::new(String::new()),
|
||||||
|
duration: Duration::from_millis(10),
|
||||||
|
timed_out: false,
|
||||||
|
};
|
||||||
|
let err = CodexErr::Sandbox(SandboxErr::Denied {
|
||||||
|
output: Box::new(output),
|
||||||
|
});
|
||||||
|
assert_eq!(get_error_message_ui(&err), "stderr detail\nstdout detail");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_denied_reports_stdout_when_no_stderr() {
|
||||||
|
let output = ExecToolCallOutput {
|
||||||
|
exit_code: 11,
|
||||||
|
stdout: StreamOutput::new("stdout only".to_string()),
|
||||||
|
stderr: StreamOutput::new(String::new()),
|
||||||
|
aggregated_output: StreamOutput::new(String::new()),
|
||||||
|
duration: Duration::from_millis(8),
|
||||||
|
timed_out: false,
|
||||||
|
};
|
||||||
|
let err = CodexErr::Sandbox(SandboxErr::Denied {
|
||||||
|
output: Box::new(output),
|
||||||
|
});
|
||||||
|
assert_eq!(get_error_message_ui(&err), "stdout only");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_denied_reports_exit_code_when_no_output_available() {
|
||||||
|
let output = ExecToolCallOutput {
|
||||||
|
exit_code: 13,
|
||||||
|
stdout: StreamOutput::new(String::new()),
|
||||||
|
stderr: StreamOutput::new(String::new()),
|
||||||
|
aggregated_output: StreamOutput::new(String::new()),
|
||||||
|
duration: Duration::from_millis(5),
|
||||||
|
timed_out: false,
|
||||||
|
};
|
||||||
|
let err = CodexErr::Sandbox(SandboxErr::Denied {
|
||||||
|
output: Box::new(output),
|
||||||
|
});
|
||||||
|
assert_eq!(
|
||||||
|
get_error_message_ui(&err),
|
||||||
|
"command failed inside sandbox with exit code 13"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn usage_limit_reached_error_formats_free_plan() {
|
fn usage_limit_reached_error_formats_free_plan() {
|
||||||
let err = UsageLimitReachedError {
|
let err = UsageLimitReachedError {
|
||||||
|
|||||||
@@ -177,7 +177,7 @@ pub async fn process_exec_tool_call(
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
if exit_code != 0 && is_likely_sandbox_denied(sandbox_type, exit_code) {
|
if is_likely_sandbox_denied(sandbox_type, &exec_output) {
|
||||||
return Err(CodexErr::Sandbox(SandboxErr::Denied {
|
return Err(CodexErr::Sandbox(SandboxErr::Denied {
|
||||||
output: Box::new(exec_output),
|
output: Box::new(exec_output),
|
||||||
}));
|
}));
|
||||||
@@ -195,21 +195,57 @@ pub async fn process_exec_tool_call(
|
|||||||
/// We don't have a fully deterministic way to tell if our command failed
|
/// We don't have a fully deterministic way to tell if our command failed
|
||||||
/// because of the sandbox - a command in the user's zshrc file might hit an
|
/// because of the sandbox - a command in the user's zshrc file might hit an
|
||||||
/// error, but the command itself might fail or succeed for other reasons.
|
/// error, but the command itself might fail or succeed for other reasons.
|
||||||
/// For now, we conservatively check for 'command not found' (exit code 127),
|
/// For now, we conservatively check for well known command failure exit codes and
|
||||||
/// and can add additional cases as necessary.
|
/// also look for common sandbox denial keywords in the command output.
|
||||||
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exit_code: i32) -> bool {
|
fn is_likely_sandbox_denied(sandbox_type: SandboxType, exec_output: &ExecToolCallOutput) -> bool {
|
||||||
if sandbox_type == SandboxType::None {
|
if sandbox_type == SandboxType::None || exec_output.exit_code == 0 {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Quick rejects: well-known non-sandbox shell exit codes
|
// Quick rejects: well-known non-sandbox shell exit codes
|
||||||
// 127: command not found, 2: misuse of shell builtins
|
// 2: misuse of shell builtins
|
||||||
if exit_code == 127 {
|
// 126: permission denied
|
||||||
|
// 127: command not found
|
||||||
|
const QUICK_REJECT_EXIT_CODES: [i32; 3] = [2, 126, 127];
|
||||||
|
if QUICK_REJECT_EXIT_CODES.contains(&exec_output.exit_code) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// For all other cases, we assume the sandbox is the cause
|
const SANDBOX_DENIED_KEYWORDS: [&str; 6] = [
|
||||||
true
|
"operation not permitted",
|
||||||
|
"permission denied",
|
||||||
|
"read-only file system",
|
||||||
|
"seccomp",
|
||||||
|
"sandbox",
|
||||||
|
"landlock",
|
||||||
|
];
|
||||||
|
|
||||||
|
if [
|
||||||
|
&exec_output.stderr.text,
|
||||||
|
&exec_output.stdout.text,
|
||||||
|
&exec_output.aggregated_output.text,
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.any(|section| {
|
||||||
|
let lower = section.to_lowercase();
|
||||||
|
SANDBOX_DENIED_KEYWORDS
|
||||||
|
.iter()
|
||||||
|
.any(|needle| lower.contains(needle))
|
||||||
|
}) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
{
|
||||||
|
const SIGSYS_CODE: i32 = libc::SIGSYS;
|
||||||
|
if sandbox_type == SandboxType::LinuxSeccomp
|
||||||
|
&& exec_output.exit_code == EXIT_CODE_SIGNAL_BASE + SIGSYS_CODE
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -436,3 +472,77 @@ fn synthetic_exit_status(code: i32) -> ExitStatus {
|
|||||||
#[expect(clippy::unwrap_used)]
|
#[expect(clippy::unwrap_used)]
|
||||||
std::process::ExitStatus::from_raw(code.try_into().unwrap())
|
std::process::ExitStatus::from_raw(code.try_into().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
fn make_exec_output(
|
||||||
|
exit_code: i32,
|
||||||
|
stdout: &str,
|
||||||
|
stderr: &str,
|
||||||
|
aggregated: &str,
|
||||||
|
) -> ExecToolCallOutput {
|
||||||
|
ExecToolCallOutput {
|
||||||
|
exit_code,
|
||||||
|
stdout: StreamOutput::new(stdout.to_string()),
|
||||||
|
stderr: StreamOutput::new(stderr.to_string()),
|
||||||
|
aggregated_output: StreamOutput::new(aggregated.to_string()),
|
||||||
|
duration: Duration::from_millis(1),
|
||||||
|
timed_out: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_detection_requires_keywords() {
|
||||||
|
let output = make_exec_output(1, "", "", "");
|
||||||
|
assert!(!is_likely_sandbox_denied(
|
||||||
|
SandboxType::LinuxSeccomp,
|
||||||
|
&output
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_detection_identifies_keyword_in_stderr() {
|
||||||
|
let output = make_exec_output(1, "", "Operation not permitted", "");
|
||||||
|
assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_detection_respects_quick_reject_exit_codes() {
|
||||||
|
let output = make_exec_output(127, "", "command not found", "");
|
||||||
|
assert!(!is_likely_sandbox_denied(
|
||||||
|
SandboxType::LinuxSeccomp,
|
||||||
|
&output
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_detection_ignores_non_sandbox_mode() {
|
||||||
|
let output = make_exec_output(1, "", "Operation not permitted", "");
|
||||||
|
assert!(!is_likely_sandbox_denied(SandboxType::None, &output));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_detection_uses_aggregated_output() {
|
||||||
|
let output = make_exec_output(
|
||||||
|
101,
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
"cargo failed: Read-only file system when writing target",
|
||||||
|
);
|
||||||
|
assert!(is_likely_sandbox_denied(
|
||||||
|
SandboxType::MacosSeatbelt,
|
||||||
|
&output
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(unix)]
|
||||||
|
#[test]
|
||||||
|
fn sandbox_detection_flags_sigsys_exit_code() {
|
||||||
|
let exit_code = EXIT_CODE_SIGNAL_BASE + libc::SIGSYS;
|
||||||
|
let output = make_exec_output(exit_code, "", "", "");
|
||||||
|
assert!(is_likely_sandbox_denied(SandboxType::LinuxSeccomp, &output));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -380,6 +380,23 @@ mod tests {
|
|||||||
assert_eq!(message, "failed in sandbox: sandbox stderr");
|
assert_eq!(message, "failed in sandbox: sandbox stderr");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sandbox_failure_message_falls_back_to_aggregated_output() {
|
||||||
|
let output = ExecToolCallOutput {
|
||||||
|
exit_code: 101,
|
||||||
|
stdout: StreamOutput::new(String::new()),
|
||||||
|
stderr: StreamOutput::new(String::new()),
|
||||||
|
aggregated_output: StreamOutput::new("aggregate text".to_string()),
|
||||||
|
duration: Duration::from_millis(10),
|
||||||
|
timed_out: false,
|
||||||
|
};
|
||||||
|
let err = SandboxErr::Denied {
|
||||||
|
output: Box::new(output),
|
||||||
|
};
|
||||||
|
let message = sandbox_failure_message(err);
|
||||||
|
assert_eq!(message, "failed in sandbox: aggregate text");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn normalize_function_error_synthesizes_payload() {
|
fn normalize_function_error_synthesizes_payload() {
|
||||||
let err = FunctionCallError::RespondToModel("boom".to_string());
|
let err = FunctionCallError::RespondToModel("boom".to_string());
|
||||||
|
|||||||
@@ -403,79 +403,6 @@ async fn shell_timeout_includes_timeout_prefix_and_metadata() -> Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
|
||||||
async fn shell_sandbox_denied_truncates_error_output() -> Result<()> {
|
|
||||||
skip_if_no_network!(Ok(()));
|
|
||||||
|
|
||||||
let server = start_mock_server().await;
|
|
||||||
let mut builder = test_codex();
|
|
||||||
let test = builder.build(&server).await?;
|
|
||||||
|
|
||||||
let call_id = "shell-denied";
|
|
||||||
let long_line = "this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz";
|
|
||||||
let script = format!(
|
|
||||||
"for i in $(seq 1 500); do >&2 echo '{long_line}'; done; cat <<'EOF' > denied.txt\ncontent\nEOF",
|
|
||||||
);
|
|
||||||
let args = json!({
|
|
||||||
"command": ["/bin/sh", "-c", script],
|
|
||||||
"timeout_ms": 1_000,
|
|
||||||
});
|
|
||||||
|
|
||||||
mount_sse_once(
|
|
||||||
&server,
|
|
||||||
sse(vec![
|
|
||||||
ev_response_created("resp-1"),
|
|
||||||
ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
|
||||||
ev_completed("resp-1"),
|
|
||||||
]),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
let second_mock = mount_sse_once(
|
|
||||||
&server,
|
|
||||||
sse(vec![
|
|
||||||
ev_assistant_message("msg-1", "done"),
|
|
||||||
ev_completed("resp-2"),
|
|
||||||
]),
|
|
||||||
)
|
|
||||||
.await;
|
|
||||||
|
|
||||||
submit_turn(
|
|
||||||
&test,
|
|
||||||
"attempt to write in read-only sandbox",
|
|
||||||
AskForApproval::Never,
|
|
||||||
SandboxPolicy::ReadOnly,
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
let denied_item = second_mock.single_request().function_call_output(call_id);
|
|
||||||
|
|
||||||
let output = denied_item
|
|
||||||
.get("output")
|
|
||||||
.and_then(Value::as_str)
|
|
||||||
.expect("denied output string");
|
|
||||||
|
|
||||||
let sandbox_pattern = r#"(?s)^Exit code: -?\d+
|
|
||||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
|
||||||
Total output lines: \d+
|
|
||||||
Output:
|
|
||||||
|
|
||||||
failed in sandbox: .*?(?:Operation not permitted|Permission denied|Read-only file system).*?
|
|
||||||
\[\.{3} omitted \d+ of \d+ lines \.{3}\]
|
|
||||||
.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
|
|
||||||
\n?$"#;
|
|
||||||
let sandbox_regex = Regex::new(sandbox_pattern)?;
|
|
||||||
if !sandbox_regex.is_match(output) {
|
|
||||||
let fallback_pattern = r#"(?s)^Total output lines: \d+
|
|
||||||
|
|
||||||
failed in sandbox: this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz
|
|
||||||
.*this is a long stderr line that should trigger truncation 0123456789abcdefghijklmnopqrstuvwxyz.*
|
|
||||||
.*(?:Operation not permitted|Permission denied|Read-only file system).*$"#;
|
|
||||||
assert_regex_match(fallback_pattern, output);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||||
async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
|
async fn shell_spawn_failure_truncates_exec_error() -> Result<()> {
|
||||||
skip_if_no_network!(Ok(()));
|
skip_if_no_network!(Ok(()));
|
||||||
|
|||||||
Reference in New Issue
Block a user