Files
llmx/codex-rs/mcp-server/src/exec_approval.rs
Eric Traut f8af4f5c8d Added model summary and risk assessment for commands that violate sandbox policy (#5536)
This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
2025-10-24 15:23:44 -07:00

159 lines
5.1 KiB
Rust

use std::path::PathBuf;
use std::sync::Arc;
use codex_core::CodexConversation;
use codex_core::protocol::Op;
use codex_core::protocol::ReviewDecision;
use codex_core::protocol::SandboxCommandAssessment;
use codex_protocol::parse_command::ParsedCommand;
use mcp_types::ElicitRequest;
use mcp_types::ElicitRequestParamsRequestedSchema;
use mcp_types::JSONRPCErrorError;
use mcp_types::ModelContextProtocolRequest;
use mcp_types::RequestId;
use serde::Deserialize;
use serde::Serialize;
use serde_json::json;
use tracing::error;
use crate::codex_tool_runner::INVALID_PARAMS_ERROR_CODE;
/// Conforms to [`mcp_types::ElicitRequestParams`] so that it can be used as the
/// `params` field of an [`ElicitRequest`].
#[derive(Debug, Deserialize, Serialize)]
pub struct ExecApprovalElicitRequestParams {
// These fields are required so that `params`
// conforms to ElicitRequestParams.
pub message: String,
#[serde(rename = "requestedSchema")]
pub requested_schema: ElicitRequestParamsRequestedSchema,
// These are additional fields the client can use to
// correlate the request with the codex tool call.
pub codex_elicitation: String,
pub codex_mcp_tool_call_id: String,
pub codex_event_id: String,
pub codex_call_id: String,
pub codex_command: Vec<String>,
pub codex_cwd: PathBuf,
pub codex_parsed_cmd: Vec<ParsedCommand>,
#[serde(skip_serializing_if = "Option::is_none")]
pub codex_risk: Option<SandboxCommandAssessment>,
}
// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
// - https://github.com/modelcontextprotocol/modelcontextprotocol/blob/f962dc1780fa5eed7fb7c8a0232f1fc83ef220cd/schema/2025-06-18/schema.json#L617-L636
// - https://modelcontextprotocol.io/specification/draft/client/elicitation#protocol-messages
// It should have "action" and "content" fields.
#[derive(Debug, Serialize, Deserialize)]
pub struct ExecApprovalResponse {
pub decision: ReviewDecision,
}
#[allow(clippy::too_many_arguments)]
pub(crate) async fn handle_exec_approval_request(
command: Vec<String>,
cwd: PathBuf,
outgoing: Arc<crate::outgoing_message::OutgoingMessageSender>,
codex: Arc<CodexConversation>,
request_id: RequestId,
tool_call_id: String,
event_id: String,
call_id: String,
codex_parsed_cmd: Vec<ParsedCommand>,
codex_risk: Option<SandboxCommandAssessment>,
) {
let escaped_command =
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
let message = format!(
"Allow Codex to run `{escaped_command}` in `{cwd}`?",
cwd = cwd.to_string_lossy()
);
let params = ExecApprovalElicitRequestParams {
message,
requested_schema: ElicitRequestParamsRequestedSchema {
r#type: "object".to_string(),
properties: json!({}),
required: None,
},
codex_elicitation: "exec-approval".to_string(),
codex_mcp_tool_call_id: tool_call_id.clone(),
codex_event_id: event_id.clone(),
codex_call_id: call_id,
codex_command: command,
codex_cwd: cwd,
codex_parsed_cmd,
codex_risk,
};
let params_json = match serde_json::to_value(&params) {
Ok(value) => value,
Err(err) => {
let message = format!("Failed to serialize ExecApprovalElicitRequestParams: {err}");
error!("{message}");
outgoing
.send_error(
request_id.clone(),
JSONRPCErrorError {
code: INVALID_PARAMS_ERROR_CODE,
message,
data: None,
},
)
.await;
return;
}
};
let on_response = outgoing
.send_request(ElicitRequest::METHOD, Some(params_json))
.await;
// Listen for the response on a separate task so we don't block the main agent loop.
{
let codex = codex.clone();
let event_id = event_id.clone();
tokio::spawn(async move {
on_exec_approval_response(event_id, on_response, codex).await;
});
}
}
async fn on_exec_approval_response(
event_id: String,
receiver: tokio::sync::oneshot::Receiver<mcp_types::Result>,
codex: Arc<CodexConversation>,
) {
let response = receiver.await;
let value = match response {
Ok(value) => value,
Err(err) => {
error!("request failed: {err:?}");
return;
}
};
// Try to deserialize `value` and then make the appropriate call to `codex`.
let response = serde_json::from_value::<ExecApprovalResponse>(value).unwrap_or_else(|err| {
error!("failed to deserialize ExecApprovalResponse: {err}");
// If we cannot deserialize the response, we deny the request to be
// conservative.
ExecApprovalResponse {
decision: ReviewDecision::Denied,
}
});
if let Err(err) = codex
.submit(Op::ExecApproval {
id: event_id,
decision: response.decision,
})
.await
{
error!("failed to submit ExecApproval: {err}");
}
}