test: add integration test for MCP server (#1633)
This PR introduces a single integration test for `cargo mcp`, though it also introduces a number of reusable components so that it should be easier to introduce more integration tests going forward. The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs` and the reusable pieces are in `codex-rs/mcp-server/tests/common`. The test itself verifies new functionality around elicitations introduced in https://github.com/openai/codex/pull/1623 (and the fix introduced in https://github.com/openai/codex/pull/1629) by doing the following: - starts a mock model provider with canned responses for `/v1/chat/completions` - starts the MCP server with a `config.toml` to use that model provider (and `approval_policy = "untrusted"`) - sends the `codex` tool call which causes the mock model provider to request a shell call for `git init` - the MCP server sends an elicitation to the client to approve the request - the client replies to the elicitation with `"approved"` - the MCP server runs the command and re-samples the model, getting a `"finish_reason": "stop"` - in turn, the MCP server sends the final response to the original `codex` tool call - verifies that `git init` ran as expected To test: ``` cargo test shell_command_approval_triggers_elicitation ``` In writing this test, I discovered that `ExecApprovalResponse` does not conform to `ElicitResult`, so I added a TODO to fix that, since I think that should be updated in a separate PR. As it stands, this PR does not update any business logic, though it does make a number of members of the `mcp-server` crate `pub` so they can be used in the test. One additional learning from this PR is that `std::process::Command::cargo_bin()` from the `assert_cmd` trait is only available for `std::process::Command`, but we really want to use `tokio::process::Command` so that everything is async and we can leverage utilities like `tokio::time::timeout()`. The trick I came up with was to use `cargo_bin()` to locate the program, and then to use `std::process::Command::get_program()` when constructing the `tokio::process::Command`.
This commit is contained in:
4
codex-rs/Cargo.lock
generated
4
codex-rs/Cargo.lock
generated
@@ -792,6 +792,7 @@ name = "codex-mcp-server"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
"codex-core",
|
||||
"codex-linux-sandbox",
|
||||
"mcp-types",
|
||||
@@ -800,10 +801,13 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shlex",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
"tokio-test",
|
||||
"toml 0.9.1",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
@@ -35,4 +35,8 @@ tokio = { version = "1", features = [
|
||||
] }
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "2"
|
||||
pretty_assertions = "1.4.1"
|
||||
tempfile = "3"
|
||||
tokio-test = "0.4"
|
||||
wiremock = "0.6"
|
||||
|
||||
@@ -7,15 +7,16 @@ use mcp_types::ToolInputSchema;
|
||||
use schemars::JsonSchema;
|
||||
use schemars::r#gen::SchemaSettings;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use crate::json_to_toml::json_to_toml;
|
||||
|
||||
/// Client-supplied configuration for a `codex` tool-call.
|
||||
#[derive(Debug, Clone, Deserialize, JsonSchema)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub(crate) struct CodexToolCallParam {
|
||||
pub struct CodexToolCallParam {
|
||||
/// The *initial user prompt* to start the Codex conversation.
|
||||
pub prompt: String,
|
||||
|
||||
@@ -49,9 +50,9 @@ pub(crate) struct CodexToolCallParam {
|
||||
|
||||
/// Custom enum mirroring [`AskForApproval`], but has an extra dependency on
|
||||
/// [`JsonSchema`].
|
||||
#[derive(Debug, Clone, Deserialize, JsonSchema)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub(crate) enum CodexToolCallApprovalPolicy {
|
||||
pub enum CodexToolCallApprovalPolicy {
|
||||
Untrusted,
|
||||
OnFailure,
|
||||
Never,
|
||||
@@ -69,9 +70,9 @@ impl From<CodexToolCallApprovalPolicy> for AskForApproval {
|
||||
|
||||
/// Custom enum mirroring [`SandboxMode`] from config_types.rs, but with
|
||||
/// `JsonSchema` support.
|
||||
#[derive(Debug, Clone, Deserialize, JsonSchema)]
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
pub(crate) enum CodexToolCallSandboxMode {
|
||||
pub enum CodexToolCallSandboxMode {
|
||||
ReadOnly,
|
||||
WorkspaceWrite,
|
||||
DangerFullAccess,
|
||||
|
||||
@@ -100,7 +100,10 @@ pub async fn run_codex_tool_session(
|
||||
}) => {
|
||||
let escaped_command = shlex::try_join(command.iter().map(|s| s.as_str()))
|
||||
.unwrap_or_else(|_| command.join(" "));
|
||||
let message = format!("Allow Codex to run `{escaped_command}` in {cwd:?}?");
|
||||
let message = format!(
|
||||
"Allow Codex to run `{escaped_command}` in `{cwd}`?",
|
||||
cwd = cwd.to_string_lossy()
|
||||
);
|
||||
|
||||
let params = ExecApprovalElicitRequestParams {
|
||||
message,
|
||||
@@ -276,7 +279,12 @@ async fn on_exec_approval_response(
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
|
||||
// - https://github.com/modelcontextprotocol/modelcontextprotocol/blob/f962dc1780fa5eed7fb7c8a0232f1fc83ef220cd/schema/2025-06-18/schema.json#L617-L636
|
||||
// - https://modelcontextprotocol.io/specification/draft/client/elicitation#protocol-messages
|
||||
// It should have "action" and "content" fields.
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ExecApprovalResponse {
|
||||
pub decision: ReviewDecision,
|
||||
}
|
||||
@@ -284,19 +292,19 @@ pub struct ExecApprovalResponse {
|
||||
/// Conforms to [`mcp_types::ElicitRequestParams`] so that it can be used as the
|
||||
/// `params` field of an [`mcp_types::ElicitRequest`].
|
||||
#[derive(Debug, Serialize)]
|
||||
struct ExecApprovalElicitRequestParams {
|
||||
pub struct ExecApprovalElicitRequestParams {
|
||||
// These fields are required so that `params`
|
||||
// conforms to ElicitRequestParams.
|
||||
message: String,
|
||||
pub message: String,
|
||||
|
||||
#[serde(rename = "requestedSchema")]
|
||||
requested_schema: ElicitRequestParamsRequestedSchema,
|
||||
pub requested_schema: ElicitRequestParamsRequestedSchema,
|
||||
|
||||
// These are additional fields the client can use to
|
||||
// correlate the request with the codex tool call.
|
||||
codex_elicitation: String,
|
||||
codex_mcp_tool_call_id: String,
|
||||
codex_event_id: String,
|
||||
codex_command: Vec<String>,
|
||||
codex_cwd: PathBuf,
|
||||
pub codex_elicitation: String,
|
||||
pub codex_mcp_tool_call_id: String,
|
||||
pub codex_event_id: String,
|
||||
pub codex_command: Vec<String>,
|
||||
pub codex_cwd: PathBuf,
|
||||
}
|
||||
|
||||
@@ -24,6 +24,10 @@ use crate::message_processor::MessageProcessor;
|
||||
use crate::outgoing_message::OutgoingMessage;
|
||||
use crate::outgoing_message::OutgoingMessageSender;
|
||||
|
||||
pub use crate::codex_tool_config::CodexToolCallParam;
|
||||
pub use crate::codex_tool_runner::ExecApprovalElicitRequestParams;
|
||||
pub use crate::codex_tool_runner::ExecApprovalResponse;
|
||||
|
||||
/// Size of the bounded channels used to communicate between tasks. The value
|
||||
/// is a balance between throughput and memory usage – 128 messages should be
|
||||
/// plenty for an interactive CLI.
|
||||
|
||||
@@ -185,7 +185,7 @@ impl MessageProcessor {
|
||||
protocol_version: params.protocol_version.clone(),
|
||||
server_info: mcp_types::Implementation {
|
||||
name: "codex-mcp-server".to_string(),
|
||||
version: mcp_types::MCP_SCHEMA_VERSION.to_string(),
|
||||
version: env!("CARGO_PKG_VERSION").to_string(),
|
||||
title: Some("Codex".to_string()),
|
||||
},
|
||||
};
|
||||
|
||||
255
codex-rs/mcp-server/tests/common/mcp_process.rs
Normal file
255
codex-rs/mcp-server/tests/common/mcp_process.rs
Normal file
@@ -0,0 +1,255 @@
|
||||
use std::path::Path;
|
||||
use std::process::Stdio;
|
||||
use std::sync::atomic::AtomicI64;
|
||||
use std::sync::atomic::Ordering;
|
||||
use tokio::io::AsyncBufReadExt;
|
||||
use tokio::io::AsyncWriteExt;
|
||||
use tokio::io::BufReader;
|
||||
use tokio::process::Child;
|
||||
use tokio::process::ChildStdin;
|
||||
use tokio::process::ChildStdout;
|
||||
|
||||
use anyhow::Context;
|
||||
use assert_cmd::prelude::*;
|
||||
use codex_mcp_server::CodexToolCallParam;
|
||||
use mcp_types::CallToolRequestParams;
|
||||
use mcp_types::ClientCapabilities;
|
||||
use mcp_types::Implementation;
|
||||
use mcp_types::InitializeRequestParams;
|
||||
use mcp_types::JSONRPC_VERSION;
|
||||
use mcp_types::JSONRPCMessage;
|
||||
use mcp_types::JSONRPCNotification;
|
||||
use mcp_types::JSONRPCRequest;
|
||||
use mcp_types::JSONRPCResponse;
|
||||
use mcp_types::ModelContextProtocolNotification;
|
||||
use mcp_types::ModelContextProtocolRequest;
|
||||
use mcp_types::RequestId;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use std::process::Command as StdCommand;
|
||||
use tokio::process::Command;
|
||||
|
||||
pub struct McpProcess {
|
||||
next_request_id: AtomicI64,
|
||||
/// Retain this child process until the client is dropped. The Tokio runtime
|
||||
/// will make a "best effort" to reap the process after it exits, but it is
|
||||
/// not a guarantee. See the `kill_on_drop` documentation for details.
|
||||
#[allow(dead_code)]
|
||||
process: Child,
|
||||
stdin: ChildStdin,
|
||||
stdout: BufReader<ChildStdout>,
|
||||
}
|
||||
|
||||
impl McpProcess {
|
||||
pub async fn new(codex_home: &Path) -> anyhow::Result<Self> {
|
||||
// Use assert_cmd to locate the binary path and then switch to tokio::process::Command
|
||||
let std_cmd = StdCommand::cargo_bin("codex-mcp-server")
|
||||
.context("should find binary for codex-mcp-server")?;
|
||||
|
||||
let program = std_cmd.get_program().to_owned();
|
||||
|
||||
let mut cmd = Command::new(program);
|
||||
|
||||
cmd.stdin(Stdio::piped());
|
||||
cmd.stdout(Stdio::piped());
|
||||
cmd.env("CODEX_HOME", codex_home);
|
||||
cmd.env("RUST_LOG", "debug");
|
||||
|
||||
let mut process = cmd
|
||||
.kill_on_drop(true)
|
||||
.spawn()
|
||||
.context("codex-mcp-server proc should start")?;
|
||||
let stdin = process
|
||||
.stdin
|
||||
.take()
|
||||
.ok_or_else(|| anyhow::format_err!("mcp should have stdin fd"))?;
|
||||
let stdout = process
|
||||
.stdout
|
||||
.take()
|
||||
.ok_or_else(|| anyhow::format_err!("mcp should have stdout fd"))?;
|
||||
let stdout = BufReader::new(stdout);
|
||||
Ok(Self {
|
||||
next_request_id: AtomicI64::new(0),
|
||||
process,
|
||||
stdin,
|
||||
stdout,
|
||||
})
|
||||
}
|
||||
|
||||
/// Performs the initialization handshake with the MCP server.
|
||||
pub async fn initialize(&mut self) -> anyhow::Result<()> {
|
||||
let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let params = InitializeRequestParams {
|
||||
capabilities: ClientCapabilities {
|
||||
elicitation: Some(json!({})),
|
||||
experimental: None,
|
||||
roots: None,
|
||||
sampling: None,
|
||||
},
|
||||
client_info: Implementation {
|
||||
name: "elicitation test".into(),
|
||||
title: Some("Elicitation Test".into()),
|
||||
version: "0.0.0".into(),
|
||||
},
|
||||
protocol_version: mcp_types::MCP_SCHEMA_VERSION.into(),
|
||||
};
|
||||
let params_value = serde_json::to_value(params)?;
|
||||
|
||||
self.send_jsonrpc_message(JSONRPCMessage::Request(JSONRPCRequest {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(request_id),
|
||||
method: mcp_types::InitializeRequest::METHOD.into(),
|
||||
params: Some(params_value),
|
||||
}))
|
||||
.await?;
|
||||
|
||||
let initialized = self.read_jsonrpc_message().await?;
|
||||
assert_eq!(
|
||||
JSONRPCMessage::Response(JSONRPCResponse {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(request_id),
|
||||
result: json!({
|
||||
"capabilities": {
|
||||
"tools": {
|
||||
"listChanged": true
|
||||
},
|
||||
},
|
||||
"serverInfo": {
|
||||
"name": "codex-mcp-server",
|
||||
"title": "Codex",
|
||||
"version": "0.0.0"
|
||||
},
|
||||
"protocolVersion": mcp_types::MCP_SCHEMA_VERSION
|
||||
})
|
||||
}),
|
||||
initialized
|
||||
);
|
||||
|
||||
// Send notifications/initialized to ack the response.
|
||||
self.send_jsonrpc_message(JSONRPCMessage::Notification(JSONRPCNotification {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
method: mcp_types::InitializedNotification::METHOD.into(),
|
||||
params: None,
|
||||
}))
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the id used to make the request so it can be used when
|
||||
/// correlating notifications.
|
||||
pub async fn send_codex_tool_call(&mut self, prompt: &str) -> anyhow::Result<i64> {
|
||||
let codex_tool_call_params = CallToolRequestParams {
|
||||
name: "codex".to_string(),
|
||||
arguments: Some(serde_json::to_value(CodexToolCallParam {
|
||||
prompt: prompt.to_string(),
|
||||
model: None,
|
||||
profile: None,
|
||||
cwd: None,
|
||||
approval_policy: None,
|
||||
sandbox: None,
|
||||
config: None,
|
||||
})?),
|
||||
};
|
||||
self.send_request(
|
||||
mcp_types::CallToolRequest::METHOD,
|
||||
Some(serde_json::to_value(codex_tool_call_params)?),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn send_request(
|
||||
&mut self,
|
||||
method: &str,
|
||||
params: Option<serde_json::Value>,
|
||||
) -> anyhow::Result<i64> {
|
||||
let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed);
|
||||
|
||||
let message = JSONRPCMessage::Request(JSONRPCRequest {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(request_id),
|
||||
method: method.to_string(),
|
||||
params,
|
||||
});
|
||||
self.send_jsonrpc_message(message).await?;
|
||||
Ok(request_id)
|
||||
}
|
||||
|
||||
pub async fn send_response(
|
||||
&mut self,
|
||||
id: RequestId,
|
||||
result: serde_json::Value,
|
||||
) -> anyhow::Result<()> {
|
||||
self.send_jsonrpc_message(JSONRPCMessage::Response(JSONRPCResponse {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id,
|
||||
result,
|
||||
}))
|
||||
.await
|
||||
}
|
||||
|
||||
async fn send_jsonrpc_message(&mut self, message: JSONRPCMessage) -> anyhow::Result<()> {
|
||||
let payload = serde_json::to_string(&message)?;
|
||||
self.stdin.write_all(payload.as_bytes()).await?;
|
||||
self.stdin.write_all(b"\n").await?;
|
||||
self.stdin.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn read_jsonrpc_message(&mut self) -> anyhow::Result<JSONRPCMessage> {
|
||||
let mut line = String::new();
|
||||
self.stdout.read_line(&mut line).await?;
|
||||
let message = serde_json::from_str::<JSONRPCMessage>(&line)?;
|
||||
Ok(message)
|
||||
}
|
||||
|
||||
pub async fn read_stream_until_request_message(&mut self) -> anyhow::Result<JSONRPCRequest> {
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
eprint!("message: {message:?}");
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(_) => {
|
||||
eprintln!("notification: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Request(jsonrpc_request) => {
|
||||
return Ok(jsonrpc_request);
|
||||
}
|
||||
JSONRPCMessage::Error(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Response(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Response: {message:?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn read_stream_until_response_message(
|
||||
&mut self,
|
||||
request_id: RequestId,
|
||||
) -> anyhow::Result<JSONRPCResponse> {
|
||||
loop {
|
||||
let message = self.read_jsonrpc_message().await?;
|
||||
eprint!("message: {message:?}");
|
||||
|
||||
match message {
|
||||
JSONRPCMessage::Notification(_) => {
|
||||
eprintln!("notification: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Request(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Error(_) => {
|
||||
anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
|
||||
}
|
||||
JSONRPCMessage::Response(jsonrpc_response) => {
|
||||
if jsonrpc_response.id == request_id {
|
||||
return Ok(jsonrpc_response);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
47
codex-rs/mcp-server/tests/common/mock_model_server.rs
Normal file
47
codex-rs/mcp-server/tests/common/mock_model_server.rs
Normal file
@@ -0,0 +1,47 @@
|
||||
use std::sync::atomic::AtomicUsize;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use wiremock::Mock;
|
||||
use wiremock::MockServer;
|
||||
use wiremock::Respond;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path;
|
||||
|
||||
/// Create a mock server that will provide the responses, in order, for
|
||||
/// requests to the `/v1/chat/completions` endpoint.
|
||||
pub async fn create_mock_chat_completions_server(responses: Vec<String>) -> MockServer {
|
||||
let server = MockServer::start().await;
|
||||
|
||||
let num_calls = responses.len();
|
||||
let seq_responder = SeqResponder {
|
||||
num_calls: AtomicUsize::new(0),
|
||||
responses,
|
||||
};
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/v1/chat/completions"))
|
||||
.respond_with(seq_responder)
|
||||
.expect(num_calls as u64)
|
||||
.mount(&server)
|
||||
.await;
|
||||
|
||||
server
|
||||
}
|
||||
|
||||
struct SeqResponder {
|
||||
num_calls: AtomicUsize,
|
||||
responses: Vec<String>,
|
||||
}
|
||||
|
||||
impl Respond for SeqResponder {
|
||||
fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
|
||||
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
|
||||
match self.responses.get(call_num) {
|
||||
Some(response) => ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_raw(response.clone(), "text/event-stream"),
|
||||
None => panic!("no response for {call_num}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
8
codex-rs/mcp-server/tests/common/mod.rs
Normal file
8
codex-rs/mcp-server/tests/common/mod.rs
Normal file
@@ -0,0 +1,8 @@
|
||||
mod mcp_process;
|
||||
mod mock_model_server;
|
||||
mod responses;
|
||||
|
||||
pub use mcp_process::McpProcess;
|
||||
pub use mock_model_server::create_mock_chat_completions_server;
|
||||
pub use responses::create_final_assistant_message_sse_response;
|
||||
pub use responses::create_shell_sse_response;
|
||||
59
codex-rs/mcp-server/tests/common/responses.rs
Normal file
59
codex-rs/mcp-server/tests/common/responses.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
use serde_json::json;
|
||||
use std::path::Path;
|
||||
|
||||
pub fn create_shell_sse_response(
|
||||
command: Vec<String>,
|
||||
workdir: Option<&Path>,
|
||||
timeout_ms: Option<u64>,
|
||||
call_id: &str,
|
||||
) -> anyhow::Result<String> {
|
||||
// The `arguments`` for the `shell` tool is a serialized JSON object.
|
||||
let tool_call_arguments = serde_json::to_string(&json!({
|
||||
"command": command,
|
||||
"workdir": workdir.map(|w| w.to_string_lossy()),
|
||||
"timeout": timeout_ms
|
||||
}))?;
|
||||
let tool_call = json!({
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": call_id,
|
||||
"function": {
|
||||
"name": "shell",
|
||||
"arguments": tool_call_arguments
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": "tool_calls"
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let sse = format!(
|
||||
"data: {}\n\ndata: DONE\n\n",
|
||||
serde_json::to_string(&tool_call)?
|
||||
);
|
||||
Ok(sse)
|
||||
}
|
||||
|
||||
pub fn create_final_assistant_message_sse_response(message: &str) -> anyhow::Result<String> {
|
||||
let assistant_message = json!({
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": message
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let sse = format!(
|
||||
"data: {}\n\ndata: DONE\n\n",
|
||||
serde_json::to_string(&assistant_message)?
|
||||
);
|
||||
Ok(sse)
|
||||
}
|
||||
195
codex-rs/mcp-server/tests/elicitation.rs
Normal file
195
codex-rs/mcp-server/tests/elicitation.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
mod common;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_mcp_server::ExecApprovalElicitRequestParams;
|
||||
use codex_mcp_server::ExecApprovalResponse;
|
||||
use mcp_types::ElicitRequest;
|
||||
use mcp_types::ElicitRequestParamsRequestedSchema;
|
||||
use mcp_types::JSONRPC_VERSION;
|
||||
use mcp_types::JSONRPCRequest;
|
||||
use mcp_types::JSONRPCResponse;
|
||||
use mcp_types::ModelContextProtocolRequest;
|
||||
use mcp_types::RequestId;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
use tokio::time::timeout;
|
||||
|
||||
use crate::common::McpProcess;
|
||||
use crate::common::create_final_assistant_message_sse_response;
|
||||
use crate::common::create_mock_chat_completions_server;
|
||||
use crate::common::create_shell_sse_response;
|
||||
|
||||
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
|
||||
|
||||
/// Test that a shell command that is not on the "trusted" list triggers an
|
||||
/// elicitation request to the MCP and that sending the approval runs the
|
||||
/// command, as expected.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn test_shell_command_approval_triggers_elicitation() {
|
||||
if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
// Apparently `#[tokio::test]` must return `()`, so we create a helper
|
||||
// function that returns `Result` so we can use `?` in favor of `unwrap`.
|
||||
if let Err(err) = shell_command_approval_triggers_elicitation().await {
|
||||
panic!("failure: {err}");
|
||||
}
|
||||
}
|
||||
|
||||
async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
|
||||
// We use `git init` because it will not be on the "trusted" list.
|
||||
let shell_command = vec!["git".to_string(), "init".to_string()];
|
||||
let workdir_for_shell_function_call = TempDir::new()?;
|
||||
|
||||
// Configure the mock server so it makes two responses:
|
||||
// 1. The first response is a shell function call that will trigger an
|
||||
// elicitation request.
|
||||
// 2. The second response is the final assistant message that should be
|
||||
// returned after the elicitation is approved and the command is run.
|
||||
let server = create_mock_chat_completions_server(vec![
|
||||
create_shell_sse_response(
|
||||
shell_command.clone(),
|
||||
Some(workdir_for_shell_function_call.path()),
|
||||
Some(5_000),
|
||||
"call1234",
|
||||
)?,
|
||||
create_final_assistant_message_sse_response("Enjoy your new git repo!")?,
|
||||
])
|
||||
.await;
|
||||
|
||||
// Run `codex mcp` with a specific config.toml.
|
||||
let codex_home = TempDir::new()?;
|
||||
create_config_toml(codex_home.path(), server.uri())?;
|
||||
let mut mcp_process = McpProcess::new(codex_home.path()).await?;
|
||||
timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
|
||||
|
||||
// Send a "codex" tool request, which should hit the completions endpoint.
|
||||
// In turn, it should reply with a tool call, which the MCP should forward
|
||||
// as an elicitation.
|
||||
let codex_request_id = mcp_process.send_codex_tool_call("run `git init`").await?;
|
||||
let elicitation_request = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_request_message(),
|
||||
)
|
||||
.await??;
|
||||
|
||||
// This is the first request from the server, so the id should be 0 given
|
||||
// how things are currently implemented.
|
||||
let elicitation_request_id = RequestId::Integer(0);
|
||||
let expected_elicitation_request = create_expected_elicitation_request(
|
||||
elicitation_request_id.clone(),
|
||||
shell_command.clone(),
|
||||
workdir_for_shell_function_call.path(),
|
||||
codex_request_id.to_string(),
|
||||
// Internal Codex id: empirically it is 1, but this is
|
||||
// admittedly an internal detail that could change.
|
||||
"1".to_string(),
|
||||
)?;
|
||||
assert_eq!(expected_elicitation_request, elicitation_request);
|
||||
|
||||
// Accept the `git init` request by responding to the elicitation.
|
||||
mcp_process
|
||||
.send_response(
|
||||
elicitation_request_id,
|
||||
serde_json::to_value(ExecApprovalResponse {
|
||||
decision: ReviewDecision::Approved,
|
||||
})?,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Verify the original `codex` tool call completes and that `git init` ran
|
||||
// successfully.
|
||||
let codex_response = timeout(
|
||||
DEFAULT_READ_TIMEOUT,
|
||||
mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
|
||||
)
|
||||
.await??;
|
||||
assert_eq!(
|
||||
JSONRPCResponse {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: RequestId::Integer(codex_request_id),
|
||||
result: json!({
|
||||
"content": [
|
||||
{
|
||||
"text": "Enjoy your new git repo!",
|
||||
"type": "text"
|
||||
}
|
||||
]
|
||||
}),
|
||||
},
|
||||
codex_response
|
||||
);
|
||||
|
||||
assert!(
|
||||
workdir_for_shell_function_call.path().join(".git").is_dir(),
|
||||
".git folder should have been created"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a Codex config that uses the mock server as the model provider.
|
||||
/// It also uses `approval_policy = "untrusted"` so that we exercise the
|
||||
/// elicitation code path for shell commands.
|
||||
fn create_config_toml(codex_home: &Path, server_uri: String) -> std::io::Result<()> {
|
||||
let config_toml = codex_home.join("config.toml");
|
||||
std::fs::write(
|
||||
config_toml,
|
||||
format!(
|
||||
r#"
|
||||
model = "mock-model"
|
||||
approval_policy = "untrusted"
|
||||
sandbox_policy = "read-only"
|
||||
|
||||
model_provider = "mock_provider"
|
||||
|
||||
[model_providers.mock_provider]
|
||||
name = "Mock provider for test"
|
||||
base_url = "{server_uri}/v1"
|
||||
wire_api = "chat"
|
||||
request_max_retries = 0
|
||||
stream_max_retries = 0
|
||||
"#
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
fn create_expected_elicitation_request(
|
||||
elicitation_request_id: RequestId,
|
||||
command: Vec<String>,
|
||||
workdir: &Path,
|
||||
codex_mcp_tool_call_id: String,
|
||||
codex_event_id: String,
|
||||
) -> anyhow::Result<JSONRPCRequest> {
|
||||
let expected_message = format!(
|
||||
"Allow Codex to run `{}` in `{}`?",
|
||||
shlex::try_join(command.iter().map(|s| s.as_ref()))?,
|
||||
workdir.to_string_lossy()
|
||||
);
|
||||
Ok(JSONRPCRequest {
|
||||
jsonrpc: JSONRPC_VERSION.into(),
|
||||
id: elicitation_request_id,
|
||||
method: ElicitRequest::METHOD.to_string(),
|
||||
params: Some(serde_json::to_value(&ExecApprovalElicitRequestParams {
|
||||
message: expected_message,
|
||||
requested_schema: ElicitRequestParamsRequestedSchema {
|
||||
r#type: "object".to_string(),
|
||||
properties: json!({}),
|
||||
required: None,
|
||||
},
|
||||
codex_elicitation: "exec-approval".to_string(),
|
||||
codex_mcp_tool_call_id,
|
||||
codex_event_id,
|
||||
codex_command: command,
|
||||
codex_cwd: workdir.to_path_buf(),
|
||||
})?),
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user