Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for commands that violate the sandbox policy and require user approval. This aids the user in evaluating whether the command should be approved. The feature works by taking a failed command and passing it back to the model and asking it to summarize the command, give it a risk level (low, medium, high) and a risk category (e.g. "data deletion" or "data exfiltration"). It uses a new conversation thread so the context in the existing thread doesn't influence the answer. If the call to the model fails or takes longer than 5 seconds, it falls back to the current behavior. For now, this is an experimental feature and is gated by a config key `experimental_sandbox_command_assessment`. Here is a screen shot of the approval prompt showing the risk assessment and summary. <img width="723" height="282" alt="image" src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b" />
2025-10-24 17:23:44 -05:00
parent a4be4d78b9
commit f8af4f5c8d
31 changed files with 701 additions and 34 deletions
--- a/codex-rs/tui/src/bottom_pane/approval_overlay.rs
+++ b/codex-rs/tui/src/bottom_pane/approval_overlay.rs
@@ -19,6 +19,9 @@ use crate::render::renderable::Renderable;
 use codex_core::protocol::FileChange;
 use codex_core::protocol::Op;
 use codex_core::protocol::ReviewDecision;
+use codex_core::protocol::SandboxCommandAssessment;
+use codex_core::protocol::SandboxRiskCategory;
+use codex_core::protocol::SandboxRiskLevel;
 use crossterm::event::KeyCode;
 use crossterm::event::KeyEvent;
 use crossterm::event::KeyEventKind;
@@ -38,6 +41,7 @@ pub(crate) enum ApprovalRequest {
        id: String,
        command: Vec<String>,
        reason: Option<String>,
+        risk: Option<SandboxCommandAssessment>,
    },
    ApplyPatch {
        id: String,
@@ -285,12 +289,17 @@ impl From<ApprovalRequest> for ApprovalRequestState {
                id,
                command,
                reason,
+                risk,
            } => {
+                let reason = reason.filter(|item| !item.is_empty());
+                let has_reason = reason.is_some();
                let mut header: Vec<Line<'static>> = Vec::new();
-                if let Some(reason) = reason
-                    && !reason.is_empty()
-                {
+                if let Some(reason) = reason {
                    header.push(Line::from(vec!["Reason: ".into(), reason.italic()]));
+                }
+                if let Some(risk) = risk.as_ref() {
+                    header.extend(render_risk_lines(risk));
+                } else if has_reason {
                    header.push(Line::from(""));
                }
                let full_cmd = strip_bash_lc_and_escape(&command);
@@ -330,6 +339,52 @@ impl From<ApprovalRequest> for ApprovalRequestState {
    }
 }

+fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec<Line<'static>> {
+    let level_span = match risk.risk_level {
+        SandboxRiskLevel::Low => "LOW".green().bold(),
+        SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(),
+        SandboxRiskLevel::High => "HIGH".red().bold(),
+    };
+
+    let mut lines = Vec::new();
+
+    let description = risk.description.trim();
+    if !description.is_empty() {
+        lines.push(Line::from(vec![
+            "Summary: ".into(),
+            description.to_string().into(),
+        ]));
+    }
+
+    let mut spans: Vec<Span<'static>> = vec!["Risk: ".into(), level_span];
+    if !risk.risk_categories.is_empty() {
+        spans.push(" (".into());
+        for (idx, category) in risk.risk_categories.iter().enumerate() {
+            if idx > 0 {
+                spans.push(", ".into());
+            }
+            spans.push(risk_category_label(*category).into());
+        }
+        spans.push(")".into());
+    }
+
+    lines.push(Line::from(spans));
+    lines.push(Line::from(""));
+    lines
+}
+
+fn risk_category_label(category: SandboxRiskCategory) -> &'static str {
+    match category {
+        SandboxRiskCategory::DataDeletion => "data deletion",
+        SandboxRiskCategory::DataExfiltration => "data exfiltration",
+        SandboxRiskCategory::PrivilegeEscalation => "privilege escalation",
+        SandboxRiskCategory::SystemModification => "system modification",
+        SandboxRiskCategory::NetworkAccess => "network access",
+        SandboxRiskCategory::ResourceExhaustion => "resource exhaustion",
+        SandboxRiskCategory::Compliance => "compliance",
+    }
+}
+
 #[derive(Clone)]
 enum ApprovalVariant {
    Exec { id: String, command: Vec<String> },
@@ -404,6 +459,7 @@ mod tests {
            id: "test".to_string(),
            command: vec!["echo".to_string(), "hi".to_string()],
            reason: Some("reason".to_string()),
+            risk: None,
        }
    }

@@ -445,6 +501,7 @@ mod tests {
            id: "test".into(),
            command,
            reason: None,
+            risk: None,
        };

        let view = ApprovalOverlay::new(exec_request, tx);
--- a/codex-rs/tui/src/bottom_pane/mod.rs
+++ b/codex-rs/tui/src/bottom_pane/mod.rs
@@ -557,6 +557,7 @@ mod tests {
            id: "1".to_string(),
            command: vec!["echo".into(), "ok".into()],
            reason: None,
+            risk: None,
        }
    }

--- a/codex-rs/tui/src/chatwidget.rs
+++ b/codex-rs/tui/src/chatwidget.rs
@@ -777,6 +777,7 @@ impl ChatWidget {
            id,
            command: ev.command,
            reason: ev.reason,
+            risk: ev.risk,
        };
        self.bottom_pane.push_approval_request(request);
        self.request_redraw();
--- a/codex-rs/tui/src/chatwidget/tests.rs
+++ b/codex-rs/tui/src/chatwidget/tests.rs
@@ -402,6 +402,7 @@ fn exec_approval_emits_proposed_command_and_decision_history() {
        reason: Some(
            "this is a test reason such as one that would be produced by the model".into(),
        ),
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -444,6 +445,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
        reason: Some(
            "this is a test reason such as one that would be produced by the model".into(),
        ),
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -492,6 +494,7 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
        command: vec!["bash".into(), "-lc".into(), long],
        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
        reason: None,
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -1421,6 +1424,7 @@ fn approval_modal_exec_snapshot() {
        reason: Some(
            "this is a test reason such as one that would be produced by the model".into(),
        ),
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -1465,6 +1469,7 @@ fn approval_modal_exec_without_reason_snapshot() {
        command: vec!["bash".into(), "-lc".into(), "echo hello world".into()],
        cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
        reason: None,
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
@@ -1675,6 +1680,7 @@ fn status_widget_and_approval_modal_snapshot() {
        reason: Some(
            "this is a test reason such as one that would be produced by the model".into(),
        ),
+        risk: None,
        parsed_cmd: vec![],
    };
    chat.handle_codex_event(Event {
--- a/codex-rs/tui/src/lib.rs
+++ b/codex-rs/tui/src/lib.rs
@@ -148,6 +148,7 @@ pub async fn run_main(
        include_view_image_tool: None,
        show_raw_agent_reasoning: cli.oss.then_some(true),
        tools_web_search_request: cli.web_search.then_some(true),
+        experimental_sandbox_command_assessment: None,
        additional_writable_roots: additional_dirs,
    };
    let raw_overrides = cli.config_overrides.raw_overrides.clone();