From d49d802b06181eb7365df13acaf0482ebcb37b08 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Mon, 21 Jul 2025 10:27:07 -0700 Subject: [PATCH] test: add integration test for MCP server (#1633) This PR introduces a single integration test for `cargo mcp`, though it also introduces a number of reusable components so that it should be easier to introduce more integration tests going forward. The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs` and the reusable pieces are in `codex-rs/mcp-server/tests/common`. The test itself verifies new functionality around elicitations introduced in https://github.com/openai/codex/pull/1623 (and the fix introduced in https://github.com/openai/codex/pull/1629) by doing the following: - starts a mock model provider with canned responses for `/v1/chat/completions` - starts the MCP server with a `config.toml` to use that model provider (and `approval_policy = "untrusted"`) - sends the `codex` tool call which causes the mock model provider to request a shell call for `git init` - the MCP server sends an elicitation to the client to approve the request - the client replies to the elicitation with `"approved"` - the MCP server runs the command and re-samples the model, getting a `"finish_reason": "stop"` - in turn, the MCP server sends the final response to the original `codex` tool call - verifies that `git init` ran as expected To test: ``` cargo test shell_command_approval_triggers_elicitation ``` In writing this test, I discovered that `ExecApprovalResponse` does not conform to `ElicitResult`, so I added a TODO to fix that, since I think that should be updated in a separate PR. As it stands, this PR does not update any business logic, though it does make a number of members of the `mcp-server` crate `pub` so they can be used in the test. One additional learning from this PR is that `std::process::Command::cargo_bin()` from the `assert_cmd` trait is only available for `std::process::Command`, but we really want to use `tokio::process::Command` so that everything is async and we can leverage utilities like `tokio::time::timeout()`. The trick I came up with was to use `cargo_bin()` to locate the program, and then to use `std::process::Command::get_program()` when constructing the `tokio::process::Command`. --- codex-rs/Cargo.lock | 4 + codex-rs/mcp-server/Cargo.toml | 4 + codex-rs/mcp-server/src/codex_tool_config.rs | 13 +- codex-rs/mcp-server/src/codex_tool_runner.rs | 28 +- codex-rs/mcp-server/src/lib.rs | 4 + codex-rs/mcp-server/src/message_processor.rs | 2 +- .../mcp-server/tests/common/mcp_process.rs | 255 ++++++++++++++++++ .../tests/common/mock_model_server.rs | 47 ++++ codex-rs/mcp-server/tests/common/mod.rs | 8 + codex-rs/mcp-server/tests/common/responses.rs | 59 ++++ codex-rs/mcp-server/tests/elicitation.rs | 195 ++++++++++++++ 11 files changed, 602 insertions(+), 17 deletions(-) create mode 100644 codex-rs/mcp-server/tests/common/mcp_process.rs create mode 100644 codex-rs/mcp-server/tests/common/mock_model_server.rs create mode 100644 codex-rs/mcp-server/tests/common/mod.rs create mode 100644 codex-rs/mcp-server/tests/common/responses.rs create mode 100644 codex-rs/mcp-server/tests/elicitation.rs diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 9171369a..6a8e76dd 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -792,6 +792,7 @@ name = "codex-mcp-server" version = "0.0.0" dependencies = [ "anyhow", + "assert_cmd", "codex-core", "codex-linux-sandbox", "mcp-types", @@ -800,10 +801,13 @@ dependencies = [ "serde", "serde_json", "shlex", + "tempfile", "tokio", + "tokio-test", "toml 0.9.1", "tracing", "tracing-subscriber", + "wiremock", ] [[package]] diff --git a/codex-rs/mcp-server/Cargo.toml b/codex-rs/mcp-server/Cargo.toml index 640a9993..f43b101b 100644 --- a/codex-rs/mcp-server/Cargo.toml +++ b/codex-rs/mcp-server/Cargo.toml @@ -35,4 +35,8 @@ tokio = { version = "1", features = [ ] } [dev-dependencies] +assert_cmd = "2" pretty_assertions = "1.4.1" +tempfile = "3" +tokio-test = "0.4" +wiremock = "0.6" diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs index f54d29dd..9a31dbcc 100644 --- a/codex-rs/mcp-server/src/codex_tool_config.rs +++ b/codex-rs/mcp-server/src/codex_tool_config.rs @@ -7,15 +7,16 @@ use mcp_types::ToolInputSchema; use schemars::JsonSchema; use schemars::r#gen::SchemaSettings; use serde::Deserialize; +use serde::Serialize; use std::collections::HashMap; use std::path::PathBuf; use crate::json_to_toml::json_to_toml; /// Client-supplied configuration for a `codex` tool-call. -#[derive(Debug, Clone, Deserialize, JsonSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "kebab-case")] -pub(crate) struct CodexToolCallParam { +pub struct CodexToolCallParam { /// The *initial user prompt* to start the Codex conversation. pub prompt: String, @@ -49,9 +50,9 @@ pub(crate) struct CodexToolCallParam { /// Custom enum mirroring [`AskForApproval`], but has an extra dependency on /// [`JsonSchema`]. -#[derive(Debug, Clone, Deserialize, JsonSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "kebab-case")] -pub(crate) enum CodexToolCallApprovalPolicy { +pub enum CodexToolCallApprovalPolicy { Untrusted, OnFailure, Never, @@ -69,9 +70,9 @@ impl From for AskForApproval { /// Custom enum mirroring [`SandboxMode`] from config_types.rs, but with /// `JsonSchema` support. -#[derive(Debug, Clone, Deserialize, JsonSchema)] +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] #[serde(rename_all = "kebab-case")] -pub(crate) enum CodexToolCallSandboxMode { +pub enum CodexToolCallSandboxMode { ReadOnly, WorkspaceWrite, DangerFullAccess, diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index 69a41bd6..163055de 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -100,7 +100,10 @@ pub async fn run_codex_tool_session( }) => { let escaped_command = shlex::try_join(command.iter().map(|s| s.as_str())) .unwrap_or_else(|_| command.join(" ")); - let message = format!("Allow Codex to run `{escaped_command}` in {cwd:?}?"); + let message = format!( + "Allow Codex to run `{escaped_command}` in `{cwd}`?", + cwd = cwd.to_string_lossy() + ); let params = ExecApprovalElicitRequestParams { message, @@ -276,7 +279,12 @@ async fn on_exec_approval_response( } } -#[derive(Debug, Deserialize)] +// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See: +// - https://github.com/modelcontextprotocol/modelcontextprotocol/blob/f962dc1780fa5eed7fb7c8a0232f1fc83ef220cd/schema/2025-06-18/schema.json#L617-L636 +// - https://modelcontextprotocol.io/specification/draft/client/elicitation#protocol-messages +// It should have "action" and "content" fields. + +#[derive(Debug, Serialize, Deserialize)] pub struct ExecApprovalResponse { pub decision: ReviewDecision, } @@ -284,19 +292,19 @@ pub struct ExecApprovalResponse { /// Conforms to [`mcp_types::ElicitRequestParams`] so that it can be used as the /// `params` field of an [`mcp_types::ElicitRequest`]. #[derive(Debug, Serialize)] -struct ExecApprovalElicitRequestParams { +pub struct ExecApprovalElicitRequestParams { // These fields are required so that `params` // conforms to ElicitRequestParams. - message: String, + pub message: String, #[serde(rename = "requestedSchema")] - requested_schema: ElicitRequestParamsRequestedSchema, + pub requested_schema: ElicitRequestParamsRequestedSchema, // These are additional fields the client can use to // correlate the request with the codex tool call. - codex_elicitation: String, - codex_mcp_tool_call_id: String, - codex_event_id: String, - codex_command: Vec, - codex_cwd: PathBuf, + pub codex_elicitation: String, + pub codex_mcp_tool_call_id: String, + pub codex_event_id: String, + pub codex_command: Vec, + pub codex_cwd: PathBuf, } diff --git a/codex-rs/mcp-server/src/lib.rs b/codex-rs/mcp-server/src/lib.rs index 3b984ecf..1f1ecc3f 100644 --- a/codex-rs/mcp-server/src/lib.rs +++ b/codex-rs/mcp-server/src/lib.rs @@ -24,6 +24,10 @@ use crate::message_processor::MessageProcessor; use crate::outgoing_message::OutgoingMessage; use crate::outgoing_message::OutgoingMessageSender; +pub use crate::codex_tool_config::CodexToolCallParam; +pub use crate::codex_tool_runner::ExecApprovalElicitRequestParams; +pub use crate::codex_tool_runner::ExecApprovalResponse; + /// Size of the bounded channels used to communicate between tasks. The value /// is a balance between throughput and memory usage – 128 messages should be /// plenty for an interactive CLI. diff --git a/codex-rs/mcp-server/src/message_processor.rs b/codex-rs/mcp-server/src/message_processor.rs index a736c8c9..61c320ed 100644 --- a/codex-rs/mcp-server/src/message_processor.rs +++ b/codex-rs/mcp-server/src/message_processor.rs @@ -185,7 +185,7 @@ impl MessageProcessor { protocol_version: params.protocol_version.clone(), server_info: mcp_types::Implementation { name: "codex-mcp-server".to_string(), - version: mcp_types::MCP_SCHEMA_VERSION.to_string(), + version: env!("CARGO_PKG_VERSION").to_string(), title: Some("Codex".to_string()), }, }; diff --git a/codex-rs/mcp-server/tests/common/mcp_process.rs b/codex-rs/mcp-server/tests/common/mcp_process.rs new file mode 100644 index 00000000..42d15f78 --- /dev/null +++ b/codex-rs/mcp-server/tests/common/mcp_process.rs @@ -0,0 +1,255 @@ +use std::path::Path; +use std::process::Stdio; +use std::sync::atomic::AtomicI64; +use std::sync::atomic::Ordering; +use tokio::io::AsyncBufReadExt; +use tokio::io::AsyncWriteExt; +use tokio::io::BufReader; +use tokio::process::Child; +use tokio::process::ChildStdin; +use tokio::process::ChildStdout; + +use anyhow::Context; +use assert_cmd::prelude::*; +use codex_mcp_server::CodexToolCallParam; +use mcp_types::CallToolRequestParams; +use mcp_types::ClientCapabilities; +use mcp_types::Implementation; +use mcp_types::InitializeRequestParams; +use mcp_types::JSONRPC_VERSION; +use mcp_types::JSONRPCMessage; +use mcp_types::JSONRPCNotification; +use mcp_types::JSONRPCRequest; +use mcp_types::JSONRPCResponse; +use mcp_types::ModelContextProtocolNotification; +use mcp_types::ModelContextProtocolRequest; +use mcp_types::RequestId; +use pretty_assertions::assert_eq; +use serde_json::json; +use std::process::Command as StdCommand; +use tokio::process::Command; + +pub struct McpProcess { + next_request_id: AtomicI64, + /// Retain this child process until the client is dropped. The Tokio runtime + /// will make a "best effort" to reap the process after it exits, but it is + /// not a guarantee. See the `kill_on_drop` documentation for details. + #[allow(dead_code)] + process: Child, + stdin: ChildStdin, + stdout: BufReader, +} + +impl McpProcess { + pub async fn new(codex_home: &Path) -> anyhow::Result { + // Use assert_cmd to locate the binary path and then switch to tokio::process::Command + let std_cmd = StdCommand::cargo_bin("codex-mcp-server") + .context("should find binary for codex-mcp-server")?; + + let program = std_cmd.get_program().to_owned(); + + let mut cmd = Command::new(program); + + cmd.stdin(Stdio::piped()); + cmd.stdout(Stdio::piped()); + cmd.env("CODEX_HOME", codex_home); + cmd.env("RUST_LOG", "debug"); + + let mut process = cmd + .kill_on_drop(true) + .spawn() + .context("codex-mcp-server proc should start")?; + let stdin = process + .stdin + .take() + .ok_or_else(|| anyhow::format_err!("mcp should have stdin fd"))?; + let stdout = process + .stdout + .take() + .ok_or_else(|| anyhow::format_err!("mcp should have stdout fd"))?; + let stdout = BufReader::new(stdout); + Ok(Self { + next_request_id: AtomicI64::new(0), + process, + stdin, + stdout, + }) + } + + /// Performs the initialization handshake with the MCP server. + pub async fn initialize(&mut self) -> anyhow::Result<()> { + let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed); + + let params = InitializeRequestParams { + capabilities: ClientCapabilities { + elicitation: Some(json!({})), + experimental: None, + roots: None, + sampling: None, + }, + client_info: Implementation { + name: "elicitation test".into(), + title: Some("Elicitation Test".into()), + version: "0.0.0".into(), + }, + protocol_version: mcp_types::MCP_SCHEMA_VERSION.into(), + }; + let params_value = serde_json::to_value(params)?; + + self.send_jsonrpc_message(JSONRPCMessage::Request(JSONRPCRequest { + jsonrpc: JSONRPC_VERSION.into(), + id: RequestId::Integer(request_id), + method: mcp_types::InitializeRequest::METHOD.into(), + params: Some(params_value), + })) + .await?; + + let initialized = self.read_jsonrpc_message().await?; + assert_eq!( + JSONRPCMessage::Response(JSONRPCResponse { + jsonrpc: JSONRPC_VERSION.into(), + id: RequestId::Integer(request_id), + result: json!({ + "capabilities": { + "tools": { + "listChanged": true + }, + }, + "serverInfo": { + "name": "codex-mcp-server", + "title": "Codex", + "version": "0.0.0" + }, + "protocolVersion": mcp_types::MCP_SCHEMA_VERSION + }) + }), + initialized + ); + + // Send notifications/initialized to ack the response. + self.send_jsonrpc_message(JSONRPCMessage::Notification(JSONRPCNotification { + jsonrpc: JSONRPC_VERSION.into(), + method: mcp_types::InitializedNotification::METHOD.into(), + params: None, + })) + .await?; + + Ok(()) + } + + /// Returns the id used to make the request so it can be used when + /// correlating notifications. + pub async fn send_codex_tool_call(&mut self, prompt: &str) -> anyhow::Result { + let codex_tool_call_params = CallToolRequestParams { + name: "codex".to_string(), + arguments: Some(serde_json::to_value(CodexToolCallParam { + prompt: prompt.to_string(), + model: None, + profile: None, + cwd: None, + approval_policy: None, + sandbox: None, + config: None, + })?), + }; + self.send_request( + mcp_types::CallToolRequest::METHOD, + Some(serde_json::to_value(codex_tool_call_params)?), + ) + .await + } + + async fn send_request( + &mut self, + method: &str, + params: Option, + ) -> anyhow::Result { + let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed); + + let message = JSONRPCMessage::Request(JSONRPCRequest { + jsonrpc: JSONRPC_VERSION.into(), + id: RequestId::Integer(request_id), + method: method.to_string(), + params, + }); + self.send_jsonrpc_message(message).await?; + Ok(request_id) + } + + pub async fn send_response( + &mut self, + id: RequestId, + result: serde_json::Value, + ) -> anyhow::Result<()> { + self.send_jsonrpc_message(JSONRPCMessage::Response(JSONRPCResponse { + jsonrpc: JSONRPC_VERSION.into(), + id, + result, + })) + .await + } + + async fn send_jsonrpc_message(&mut self, message: JSONRPCMessage) -> anyhow::Result<()> { + let payload = serde_json::to_string(&message)?; + self.stdin.write_all(payload.as_bytes()).await?; + self.stdin.write_all(b"\n").await?; + self.stdin.flush().await?; + Ok(()) + } + + async fn read_jsonrpc_message(&mut self) -> anyhow::Result { + let mut line = String::new(); + self.stdout.read_line(&mut line).await?; + let message = serde_json::from_str::(&line)?; + Ok(message) + } + + pub async fn read_stream_until_request_message(&mut self) -> anyhow::Result { + loop { + let message = self.read_jsonrpc_message().await?; + eprint!("message: {message:?}"); + + match message { + JSONRPCMessage::Notification(_) => { + eprintln!("notification: {message:?}"); + } + JSONRPCMessage::Request(jsonrpc_request) => { + return Ok(jsonrpc_request); + } + JSONRPCMessage::Error(_) => { + anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}"); + } + JSONRPCMessage::Response(_) => { + anyhow::bail!("unexpected JSONRPCMessage::Response: {message:?}"); + } + } + } + } + + pub async fn read_stream_until_response_message( + &mut self, + request_id: RequestId, + ) -> anyhow::Result { + loop { + let message = self.read_jsonrpc_message().await?; + eprint!("message: {message:?}"); + + match message { + JSONRPCMessage::Notification(_) => { + eprintln!("notification: {message:?}"); + } + JSONRPCMessage::Request(_) => { + anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}"); + } + JSONRPCMessage::Error(_) => { + anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}"); + } + JSONRPCMessage::Response(jsonrpc_response) => { + if jsonrpc_response.id == request_id { + return Ok(jsonrpc_response); + } + } + } + } + } +} diff --git a/codex-rs/mcp-server/tests/common/mock_model_server.rs b/codex-rs/mcp-server/tests/common/mock_model_server.rs new file mode 100644 index 00000000..be7f3eb5 --- /dev/null +++ b/codex-rs/mcp-server/tests/common/mock_model_server.rs @@ -0,0 +1,47 @@ +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::Respond; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; + +/// Create a mock server that will provide the responses, in order, for +/// requests to the `/v1/chat/completions` endpoint. +pub async fn create_mock_chat_completions_server(responses: Vec) -> MockServer { + let server = MockServer::start().await; + + let num_calls = responses.len(); + let seq_responder = SeqResponder { + num_calls: AtomicUsize::new(0), + responses, + }; + + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with(seq_responder) + .expect(num_calls as u64) + .mount(&server) + .await; + + server +} + +struct SeqResponder { + num_calls: AtomicUsize, + responses: Vec, +} + +impl Respond for SeqResponder { + fn respond(&self, _: &wiremock::Request) -> ResponseTemplate { + let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst); + match self.responses.get(call_num) { + Some(response) => ResponseTemplate::new(200) + .insert_header("content-type", "text/event-stream") + .set_body_raw(response.clone(), "text/event-stream"), + None => panic!("no response for {call_num}"), + } + } +} diff --git a/codex-rs/mcp-server/tests/common/mod.rs b/codex-rs/mcp-server/tests/common/mod.rs new file mode 100644 index 00000000..61a5774b --- /dev/null +++ b/codex-rs/mcp-server/tests/common/mod.rs @@ -0,0 +1,8 @@ +mod mcp_process; +mod mock_model_server; +mod responses; + +pub use mcp_process::McpProcess; +pub use mock_model_server::create_mock_chat_completions_server; +pub use responses::create_final_assistant_message_sse_response; +pub use responses::create_shell_sse_response; diff --git a/codex-rs/mcp-server/tests/common/responses.rs b/codex-rs/mcp-server/tests/common/responses.rs new file mode 100644 index 00000000..a11c72d0 --- /dev/null +++ b/codex-rs/mcp-server/tests/common/responses.rs @@ -0,0 +1,59 @@ +use serde_json::json; +use std::path::Path; + +pub fn create_shell_sse_response( + command: Vec, + workdir: Option<&Path>, + timeout_ms: Option, + call_id: &str, +) -> anyhow::Result { + // The `arguments`` for the `shell` tool is a serialized JSON object. + let tool_call_arguments = serde_json::to_string(&json!({ + "command": command, + "workdir": workdir.map(|w| w.to_string_lossy()), + "timeout": timeout_ms + }))?; + let tool_call = json!({ + "choices": [ + { + "delta": { + "tool_calls": [ + { + "id": call_id, + "function": { + "name": "shell", + "arguments": tool_call_arguments + } + } + ] + }, + "finish_reason": "tool_calls" + } + ] + }); + + let sse = format!( + "data: {}\n\ndata: DONE\n\n", + serde_json::to_string(&tool_call)? + ); + Ok(sse) +} + +pub fn create_final_assistant_message_sse_response(message: &str) -> anyhow::Result { + let assistant_message = json!({ + "choices": [ + { + "delta": { + "content": message + }, + "finish_reason": "stop" + } + ] + }); + + let sse = format!( + "data: {}\n\ndata: DONE\n\n", + serde_json::to_string(&assistant_message)? + ); + Ok(sse) +} diff --git a/codex-rs/mcp-server/tests/elicitation.rs b/codex-rs/mcp-server/tests/elicitation.rs new file mode 100644 index 00000000..7fd68d67 --- /dev/null +++ b/codex-rs/mcp-server/tests/elicitation.rs @@ -0,0 +1,195 @@ +mod common; + +use std::path::Path; + +use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; +use codex_core::protocol::ReviewDecision; +use codex_mcp_server::ExecApprovalElicitRequestParams; +use codex_mcp_server::ExecApprovalResponse; +use mcp_types::ElicitRequest; +use mcp_types::ElicitRequestParamsRequestedSchema; +use mcp_types::JSONRPC_VERSION; +use mcp_types::JSONRPCRequest; +use mcp_types::JSONRPCResponse; +use mcp_types::ModelContextProtocolRequest; +use mcp_types::RequestId; +use pretty_assertions::assert_eq; +use serde_json::json; +use tempfile::TempDir; +use tokio::time::timeout; + +use crate::common::McpProcess; +use crate::common::create_final_assistant_message_sse_response; +use crate::common::create_mock_chat_completions_server; +use crate::common::create_shell_sse_response; + +const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10); + +/// Test that a shell command that is not on the "trusted" list triggers an +/// elicitation request to the MCP and that sending the approval runs the +/// command, as expected. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_shell_command_approval_triggers_elicitation() { + if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() { + println!( + "Skipping test because it cannot execute when network is disabled in a Codex sandbox." + ); + return; + } + + // Apparently `#[tokio::test]` must return `()`, so we create a helper + // function that returns `Result` so we can use `?` in favor of `unwrap`. + if let Err(err) = shell_command_approval_triggers_elicitation().await { + panic!("failure: {err}"); + } +} + +async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> { + // We use `git init` because it will not be on the "trusted" list. + let shell_command = vec!["git".to_string(), "init".to_string()]; + let workdir_for_shell_function_call = TempDir::new()?; + + // Configure the mock server so it makes two responses: + // 1. The first response is a shell function call that will trigger an + // elicitation request. + // 2. The second response is the final assistant message that should be + // returned after the elicitation is approved and the command is run. + let server = create_mock_chat_completions_server(vec![ + create_shell_sse_response( + shell_command.clone(), + Some(workdir_for_shell_function_call.path()), + Some(5_000), + "call1234", + )?, + create_final_assistant_message_sse_response("Enjoy your new git repo!")?, + ]) + .await; + + // Run `codex mcp` with a specific config.toml. + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), server.uri())?; + let mut mcp_process = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??; + + // Send a "codex" tool request, which should hit the completions endpoint. + // In turn, it should reply with a tool call, which the MCP should forward + // as an elicitation. + let codex_request_id = mcp_process.send_codex_tool_call("run `git init`").await?; + let elicitation_request = timeout( + DEFAULT_READ_TIMEOUT, + mcp_process.read_stream_until_request_message(), + ) + .await??; + + // This is the first request from the server, so the id should be 0 given + // how things are currently implemented. + let elicitation_request_id = RequestId::Integer(0); + let expected_elicitation_request = create_expected_elicitation_request( + elicitation_request_id.clone(), + shell_command.clone(), + workdir_for_shell_function_call.path(), + codex_request_id.to_string(), + // Internal Codex id: empirically it is 1, but this is + // admittedly an internal detail that could change. + "1".to_string(), + )?; + assert_eq!(expected_elicitation_request, elicitation_request); + + // Accept the `git init` request by responding to the elicitation. + mcp_process + .send_response( + elicitation_request_id, + serde_json::to_value(ExecApprovalResponse { + decision: ReviewDecision::Approved, + })?, + ) + .await?; + + // Verify the original `codex` tool call completes and that `git init` ran + // successfully. + let codex_response = timeout( + DEFAULT_READ_TIMEOUT, + mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)), + ) + .await??; + assert_eq!( + JSONRPCResponse { + jsonrpc: JSONRPC_VERSION.into(), + id: RequestId::Integer(codex_request_id), + result: json!({ + "content": [ + { + "text": "Enjoy your new git repo!", + "type": "text" + } + ] + }), + }, + codex_response + ); + + assert!( + workdir_for_shell_function_call.path().join(".git").is_dir(), + ".git folder should have been created" + ); + + Ok(()) +} + +/// Create a Codex config that uses the mock server as the model provider. +/// It also uses `approval_policy = "untrusted"` so that we exercise the +/// elicitation code path for shell commands. +fn create_config_toml(codex_home: &Path, server_uri: String) -> std::io::Result<()> { + let config_toml = codex_home.join("config.toml"); + std::fs::write( + config_toml, + format!( + r#" +model = "mock-model" +approval_policy = "untrusted" +sandbox_policy = "read-only" + +model_provider = "mock_provider" + +[model_providers.mock_provider] +name = "Mock provider for test" +base_url = "{server_uri}/v1" +wire_api = "chat" +request_max_retries = 0 +stream_max_retries = 0 +"# + ), + ) +} + +fn create_expected_elicitation_request( + elicitation_request_id: RequestId, + command: Vec, + workdir: &Path, + codex_mcp_tool_call_id: String, + codex_event_id: String, +) -> anyhow::Result { + let expected_message = format!( + "Allow Codex to run `{}` in `{}`?", + shlex::try_join(command.iter().map(|s| s.as_ref()))?, + workdir.to_string_lossy() + ); + Ok(JSONRPCRequest { + jsonrpc: JSONRPC_VERSION.into(), + id: elicitation_request_id, + method: ElicitRequest::METHOD.to_string(), + params: Some(serde_json::to_value(&ExecApprovalElicitRequestParams { + message: expected_message, + requested_schema: ElicitRequestParamsRequestedSchema { + r#type: "object".to_string(), + properties: json!({}), + required: None, + }, + codex_elicitation: "exec-approval".to_string(), + codex_mcp_tool_call_id, + codex_event_id, + codex_command: command, + codex_cwd: workdir.to_path_buf(), + })?), + }) +}