From d49d802b06181eb7365df13acaf0482ebcb37b08 Mon Sep 17 00:00:00 2001
From: Michael Bolin <mbolin@openai.com>
Date: Mon, 21 Jul 2025 10:27:07 -0700
Subject: [PATCH] test: add integration test for MCP server (#1633)

This PR introduces a single integration test for `cargo mcp`, though it
also introduces a number of reusable components so that it should be
easier to introduce more integration tests going forward.

The new test is introduced in `codex-rs/mcp-server/tests/elicitation.rs`
and the reusable pieces are in `codex-rs/mcp-server/tests/common`.

The test itself verifies new functionality around elicitations
introduced in https://github.com/openai/codex/pull/1623 (and the fix
introduced in https://github.com/openai/codex/pull/1629) by doing the
following:

- starts a mock model provider with canned responses for
`/v1/chat/completions`
- starts the MCP server with a `config.toml` to use that model provider
(and `approval_policy = "untrusted"`)
- sends the `codex` tool call which causes the mock model provider to
request a shell call for `git init`
- the MCP server sends an elicitation to the client to approve the
request
- the client replies to the elicitation with `"approved"`
- the MCP server runs the command and re-samples the model, getting a
`"finish_reason": "stop"`
- in turn, the MCP server sends the final response to the original
`codex` tool call
- verifies that `git init` ran as expected

To test:

```
cargo test shell_command_approval_triggers_elicitation
```

In writing this test, I discovered that `ExecApprovalResponse` does not
conform to `ElicitResult`, so I added a TODO to fix that, since I think
that should be updated in a separate PR. As it stands, this PR does not
update any business logic, though it does make a number of members of
the `mcp-server` crate `pub` so they can be used in the test.

One additional learning from this PR is that
`std::process::Command::cargo_bin()` from the `assert_cmd` trait is only
available for `std::process::Command`, but we really want to use
`tokio::process::Command` so that everything is async and we can
leverage utilities like `tokio::time::timeout()`. The trick I came up
with was to use `cargo_bin()` to locate the program, and then to use
`std::process::Command::get_program()` when constructing the
`tokio::process::Command`.
---
 codex-rs/Cargo.lock                           |   4 +
 codex-rs/mcp-server/Cargo.toml                |   4 +
 codex-rs/mcp-server/src/codex_tool_config.rs  |  13 +-
 codex-rs/mcp-server/src/codex_tool_runner.rs  |  28 +-
 codex-rs/mcp-server/src/lib.rs                |   4 +
 codex-rs/mcp-server/src/message_processor.rs  |   2 +-
 .../mcp-server/tests/common/mcp_process.rs    | 255 ++++++++++++++++++
 .../tests/common/mock_model_server.rs         |  47 ++++
 codex-rs/mcp-server/tests/common/mod.rs       |   8 +
 codex-rs/mcp-server/tests/common/responses.rs |  59 ++++
 codex-rs/mcp-server/tests/elicitation.rs      | 195 ++++++++++++++
 11 files changed, 602 insertions(+), 17 deletions(-)
 create mode 100644 codex-rs/mcp-server/tests/common/mcp_process.rs
 create mode 100644 codex-rs/mcp-server/tests/common/mock_model_server.rs
 create mode 100644 codex-rs/mcp-server/tests/common/mod.rs
 create mode 100644 codex-rs/mcp-server/tests/common/responses.rs
 create mode 100644 codex-rs/mcp-server/tests/elicitation.rs

diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index 9171369a..6a8e76dd 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -792,6 +792,7 @@ name = "codex-mcp-server"
 version = "0.0.0"
 dependencies = [
  "anyhow",
+ "assert_cmd",
  "codex-core",
  "codex-linux-sandbox",
  "mcp-types",
@@ -800,10 +801,13 @@ dependencies = [
  "serde",
  "serde_json",
  "shlex",
+ "tempfile",
  "tokio",
+ "tokio-test",
  "toml 0.9.1",
  "tracing",
  "tracing-subscriber",
+ "wiremock",
 ]
 
 [[package]]
diff --git a/codex-rs/mcp-server/Cargo.toml b/codex-rs/mcp-server/Cargo.toml
index 640a9993..f43b101b 100644
--- a/codex-rs/mcp-server/Cargo.toml
+++ b/codex-rs/mcp-server/Cargo.toml
@@ -35,4 +35,8 @@ tokio = { version = "1", features = [
 ] }
 
 [dev-dependencies]
+assert_cmd = "2"
 pretty_assertions = "1.4.1"
+tempfile = "3"
+tokio-test = "0.4"
+wiremock = "0.6"
diff --git a/codex-rs/mcp-server/src/codex_tool_config.rs b/codex-rs/mcp-server/src/codex_tool_config.rs
index f54d29dd..9a31dbcc 100644
--- a/codex-rs/mcp-server/src/codex_tool_config.rs
+++ b/codex-rs/mcp-server/src/codex_tool_config.rs
@@ -7,15 +7,16 @@ use mcp_types::ToolInputSchema;
 use schemars::JsonSchema;
 use schemars::r#gen::SchemaSettings;
 use serde::Deserialize;
+use serde::Serialize;
 use std::collections::HashMap;
 use std::path::PathBuf;
 
 use crate::json_to_toml::json_to_toml;
 
 /// Client-supplied configuration for a `codex` tool-call.
-#[derive(Debug, Clone, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "kebab-case")]
-pub(crate) struct CodexToolCallParam {
+pub struct CodexToolCallParam {
     /// The *initial user prompt* to start the Codex conversation.
     pub prompt: String,
 
@@ -49,9 +50,9 @@ pub(crate) struct CodexToolCallParam {
 
 /// Custom enum mirroring [`AskForApproval`], but has an extra dependency on
 /// [`JsonSchema`].
-#[derive(Debug, Clone, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "kebab-case")]
-pub(crate) enum CodexToolCallApprovalPolicy {
+pub enum CodexToolCallApprovalPolicy {
     Untrusted,
     OnFailure,
     Never,
@@ -69,9 +70,9 @@ impl From<CodexToolCallApprovalPolicy> for AskForApproval {
 
 /// Custom enum mirroring [`SandboxMode`] from config_types.rs, but with
 /// `JsonSchema` support.
-#[derive(Debug, Clone, Deserialize, JsonSchema)]
+#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
 #[serde(rename_all = "kebab-case")]
-pub(crate) enum CodexToolCallSandboxMode {
+pub enum CodexToolCallSandboxMode {
     ReadOnly,
     WorkspaceWrite,
     DangerFullAccess,
diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs
index 69a41bd6..163055de 100644
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -100,7 +100,10 @@ pub async fn run_codex_tool_session(
                     }) => {
                         let escaped_command = shlex::try_join(command.iter().map(|s| s.as_str()))
                             .unwrap_or_else(|_| command.join(" "));
-                        let message = format!("Allow Codex to run `{escaped_command}` in {cwd:?}?");
+                        let message = format!(
+                            "Allow Codex to run `{escaped_command}` in `{cwd}`?",
+                            cwd = cwd.to_string_lossy()
+                        );
 
                         let params = ExecApprovalElicitRequestParams {
                             message,
@@ -276,7 +279,12 @@ async fn on_exec_approval_response(
     }
 }
 
-#[derive(Debug, Deserialize)]
+// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
+// - https://github.com/modelcontextprotocol/modelcontextprotocol/blob/f962dc1780fa5eed7fb7c8a0232f1fc83ef220cd/schema/2025-06-18/schema.json#L617-L636
+// - https://modelcontextprotocol.io/specification/draft/client/elicitation#protocol-messages
+// It should have "action" and "content" fields.
+
+#[derive(Debug, Serialize, Deserialize)]
 pub struct ExecApprovalResponse {
     pub decision: ReviewDecision,
 }
@@ -284,19 +292,19 @@ pub struct ExecApprovalResponse {
 /// Conforms to [`mcp_types::ElicitRequestParams`] so that it can be used as the
 /// `params` field of an [`mcp_types::ElicitRequest`].
 #[derive(Debug, Serialize)]
-struct ExecApprovalElicitRequestParams {
+pub struct ExecApprovalElicitRequestParams {
     // These fields are required so that `params`
     // conforms to ElicitRequestParams.
-    message: String,
+    pub message: String,
 
     #[serde(rename = "requestedSchema")]
-    requested_schema: ElicitRequestParamsRequestedSchema,
+    pub requested_schema: ElicitRequestParamsRequestedSchema,
 
     // These are additional fields the client can use to
     // correlate the request with the codex tool call.
-    codex_elicitation: String,
-    codex_mcp_tool_call_id: String,
-    codex_event_id: String,
-    codex_command: Vec<String>,
-    codex_cwd: PathBuf,
+    pub codex_elicitation: String,
+    pub codex_mcp_tool_call_id: String,
+    pub codex_event_id: String,
+    pub codex_command: Vec<String>,
+    pub codex_cwd: PathBuf,
 }
diff --git a/codex-rs/mcp-server/src/lib.rs b/codex-rs/mcp-server/src/lib.rs
index 3b984ecf..1f1ecc3f 100644
--- a/codex-rs/mcp-server/src/lib.rs
+++ b/codex-rs/mcp-server/src/lib.rs
@@ -24,6 +24,10 @@ use crate::message_processor::MessageProcessor;
 use crate::outgoing_message::OutgoingMessage;
 use crate::outgoing_message::OutgoingMessageSender;
 
+pub use crate::codex_tool_config::CodexToolCallParam;
+pub use crate::codex_tool_runner::ExecApprovalElicitRequestParams;
+pub use crate::codex_tool_runner::ExecApprovalResponse;
+
 /// Size of the bounded channels used to communicate between tasks. The value
 /// is a balance between throughput and memory usage – 128 messages should be
 /// plenty for an interactive CLI.
diff --git a/codex-rs/mcp-server/src/message_processor.rs b/codex-rs/mcp-server/src/message_processor.rs
index a736c8c9..61c320ed 100644
--- a/codex-rs/mcp-server/src/message_processor.rs
+++ b/codex-rs/mcp-server/src/message_processor.rs
@@ -185,7 +185,7 @@ impl MessageProcessor {
             protocol_version: params.protocol_version.clone(),
             server_info: mcp_types::Implementation {
                 name: "codex-mcp-server".to_string(),
-                version: mcp_types::MCP_SCHEMA_VERSION.to_string(),
+                version: env!("CARGO_PKG_VERSION").to_string(),
                 title: Some("Codex".to_string()),
             },
         };
diff --git a/codex-rs/mcp-server/tests/common/mcp_process.rs b/codex-rs/mcp-server/tests/common/mcp_process.rs
new file mode 100644
index 00000000..42d15f78
--- /dev/null
+++ b/codex-rs/mcp-server/tests/common/mcp_process.rs
@@ -0,0 +1,255 @@
+use std::path::Path;
+use std::process::Stdio;
+use std::sync::atomic::AtomicI64;
+use std::sync::atomic::Ordering;
+use tokio::io::AsyncBufReadExt;
+use tokio::io::AsyncWriteExt;
+use tokio::io::BufReader;
+use tokio::process::Child;
+use tokio::process::ChildStdin;
+use tokio::process::ChildStdout;
+
+use anyhow::Context;
+use assert_cmd::prelude::*;
+use codex_mcp_server::CodexToolCallParam;
+use mcp_types::CallToolRequestParams;
+use mcp_types::ClientCapabilities;
+use mcp_types::Implementation;
+use mcp_types::InitializeRequestParams;
+use mcp_types::JSONRPC_VERSION;
+use mcp_types::JSONRPCMessage;
+use mcp_types::JSONRPCNotification;
+use mcp_types::JSONRPCRequest;
+use mcp_types::JSONRPCResponse;
+use mcp_types::ModelContextProtocolNotification;
+use mcp_types::ModelContextProtocolRequest;
+use mcp_types::RequestId;
+use pretty_assertions::assert_eq;
+use serde_json::json;
+use std::process::Command as StdCommand;
+use tokio::process::Command;
+
+pub struct McpProcess {
+    next_request_id: AtomicI64,
+    /// Retain this child process until the client is dropped. The Tokio runtime
+    /// will make a "best effort" to reap the process after it exits, but it is
+    /// not a guarantee. See the `kill_on_drop` documentation for details.
+    #[allow(dead_code)]
+    process: Child,
+    stdin: ChildStdin,
+    stdout: BufReader<ChildStdout>,
+}
+
+impl McpProcess {
+    pub async fn new(codex_home: &Path) -> anyhow::Result<Self> {
+        // Use assert_cmd to locate the binary path and then switch to tokio::process::Command
+        let std_cmd = StdCommand::cargo_bin("codex-mcp-server")
+            .context("should find binary for codex-mcp-server")?;
+
+        let program = std_cmd.get_program().to_owned();
+
+        let mut cmd = Command::new(program);
+
+        cmd.stdin(Stdio::piped());
+        cmd.stdout(Stdio::piped());
+        cmd.env("CODEX_HOME", codex_home);
+        cmd.env("RUST_LOG", "debug");
+
+        let mut process = cmd
+            .kill_on_drop(true)
+            .spawn()
+            .context("codex-mcp-server proc should start")?;
+        let stdin = process
+            .stdin
+            .take()
+            .ok_or_else(|| anyhow::format_err!("mcp should have stdin fd"))?;
+        let stdout = process
+            .stdout
+            .take()
+            .ok_or_else(|| anyhow::format_err!("mcp should have stdout fd"))?;
+        let stdout = BufReader::new(stdout);
+        Ok(Self {
+            next_request_id: AtomicI64::new(0),
+            process,
+            stdin,
+            stdout,
+        })
+    }
+
+    /// Performs the initialization handshake with the MCP server.
+    pub async fn initialize(&mut self) -> anyhow::Result<()> {
+        let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed);
+
+        let params = InitializeRequestParams {
+            capabilities: ClientCapabilities {
+                elicitation: Some(json!({})),
+                experimental: None,
+                roots: None,
+                sampling: None,
+            },
+            client_info: Implementation {
+                name: "elicitation test".into(),
+                title: Some("Elicitation Test".into()),
+                version: "0.0.0".into(),
+            },
+            protocol_version: mcp_types::MCP_SCHEMA_VERSION.into(),
+        };
+        let params_value = serde_json::to_value(params)?;
+
+        self.send_jsonrpc_message(JSONRPCMessage::Request(JSONRPCRequest {
+            jsonrpc: JSONRPC_VERSION.into(),
+            id: RequestId::Integer(request_id),
+            method: mcp_types::InitializeRequest::METHOD.into(),
+            params: Some(params_value),
+        }))
+        .await?;
+
+        let initialized = self.read_jsonrpc_message().await?;
+        assert_eq!(
+            JSONRPCMessage::Response(JSONRPCResponse {
+                jsonrpc: JSONRPC_VERSION.into(),
+                id: RequestId::Integer(request_id),
+                result: json!({
+                    "capabilities": {
+                        "tools": {
+                            "listChanged": true
+                        },
+                    },
+                    "serverInfo": {
+                        "name": "codex-mcp-server",
+                        "title": "Codex",
+                        "version": "0.0.0"
+                    },
+                    "protocolVersion": mcp_types::MCP_SCHEMA_VERSION
+                })
+            }),
+            initialized
+        );
+
+        // Send notifications/initialized to ack the response.
+        self.send_jsonrpc_message(JSONRPCMessage::Notification(JSONRPCNotification {
+            jsonrpc: JSONRPC_VERSION.into(),
+            method: mcp_types::InitializedNotification::METHOD.into(),
+            params: None,
+        }))
+        .await?;
+
+        Ok(())
+    }
+
+    /// Returns the id used to make the request so it can be used when
+    /// correlating notifications.
+    pub async fn send_codex_tool_call(&mut self, prompt: &str) -> anyhow::Result<i64> {
+        let codex_tool_call_params = CallToolRequestParams {
+            name: "codex".to_string(),
+            arguments: Some(serde_json::to_value(CodexToolCallParam {
+                prompt: prompt.to_string(),
+                model: None,
+                profile: None,
+                cwd: None,
+                approval_policy: None,
+                sandbox: None,
+                config: None,
+            })?),
+        };
+        self.send_request(
+            mcp_types::CallToolRequest::METHOD,
+            Some(serde_json::to_value(codex_tool_call_params)?),
+        )
+        .await
+    }
+
+    async fn send_request(
+        &mut self,
+        method: &str,
+        params: Option<serde_json::Value>,
+    ) -> anyhow::Result<i64> {
+        let request_id = self.next_request_id.fetch_add(1, Ordering::Relaxed);
+
+        let message = JSONRPCMessage::Request(JSONRPCRequest {
+            jsonrpc: JSONRPC_VERSION.into(),
+            id: RequestId::Integer(request_id),
+            method: method.to_string(),
+            params,
+        });
+        self.send_jsonrpc_message(message).await?;
+        Ok(request_id)
+    }
+
+    pub async fn send_response(
+        &mut self,
+        id: RequestId,
+        result: serde_json::Value,
+    ) -> anyhow::Result<()> {
+        self.send_jsonrpc_message(JSONRPCMessage::Response(JSONRPCResponse {
+            jsonrpc: JSONRPC_VERSION.into(),
+            id,
+            result,
+        }))
+        .await
+    }
+
+    async fn send_jsonrpc_message(&mut self, message: JSONRPCMessage) -> anyhow::Result<()> {
+        let payload = serde_json::to_string(&message)?;
+        self.stdin.write_all(payload.as_bytes()).await?;
+        self.stdin.write_all(b"\n").await?;
+        self.stdin.flush().await?;
+        Ok(())
+    }
+
+    async fn read_jsonrpc_message(&mut self) -> anyhow::Result<JSONRPCMessage> {
+        let mut line = String::new();
+        self.stdout.read_line(&mut line).await?;
+        let message = serde_json::from_str::<JSONRPCMessage>(&line)?;
+        Ok(message)
+    }
+
+    pub async fn read_stream_until_request_message(&mut self) -> anyhow::Result<JSONRPCRequest> {
+        loop {
+            let message = self.read_jsonrpc_message().await?;
+            eprint!("message: {message:?}");
+
+            match message {
+                JSONRPCMessage::Notification(_) => {
+                    eprintln!("notification: {message:?}");
+                }
+                JSONRPCMessage::Request(jsonrpc_request) => {
+                    return Ok(jsonrpc_request);
+                }
+                JSONRPCMessage::Error(_) => {
+                    anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
+                }
+                JSONRPCMessage::Response(_) => {
+                    anyhow::bail!("unexpected JSONRPCMessage::Response: {message:?}");
+                }
+            }
+        }
+    }
+
+    pub async fn read_stream_until_response_message(
+        &mut self,
+        request_id: RequestId,
+    ) -> anyhow::Result<JSONRPCResponse> {
+        loop {
+            let message = self.read_jsonrpc_message().await?;
+            eprint!("message: {message:?}");
+
+            match message {
+                JSONRPCMessage::Notification(_) => {
+                    eprintln!("notification: {message:?}");
+                }
+                JSONRPCMessage::Request(_) => {
+                    anyhow::bail!("unexpected JSONRPCMessage::Request: {message:?}");
+                }
+                JSONRPCMessage::Error(_) => {
+                    anyhow::bail!("unexpected JSONRPCMessage::Error: {message:?}");
+                }
+                JSONRPCMessage::Response(jsonrpc_response) => {
+                    if jsonrpc_response.id == request_id {
+                        return Ok(jsonrpc_response);
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/codex-rs/mcp-server/tests/common/mock_model_server.rs b/codex-rs/mcp-server/tests/common/mock_model_server.rs
new file mode 100644
index 00000000..be7f3eb5
--- /dev/null
+++ b/codex-rs/mcp-server/tests/common/mock_model_server.rs
@@ -0,0 +1,47 @@
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
+use wiremock::Mock;
+use wiremock::MockServer;
+use wiremock::Respond;
+use wiremock::ResponseTemplate;
+use wiremock::matchers::method;
+use wiremock::matchers::path;
+
+/// Create a mock server that will provide the responses, in order, for
+/// requests to the `/v1/chat/completions` endpoint.
+pub async fn create_mock_chat_completions_server(responses: Vec<String>) -> MockServer {
+    let server = MockServer::start().await;
+
+    let num_calls = responses.len();
+    let seq_responder = SeqResponder {
+        num_calls: AtomicUsize::new(0),
+        responses,
+    };
+
+    Mock::given(method("POST"))
+        .and(path("/v1/chat/completions"))
+        .respond_with(seq_responder)
+        .expect(num_calls as u64)
+        .mount(&server)
+        .await;
+
+    server
+}
+
+struct SeqResponder {
+    num_calls: AtomicUsize,
+    responses: Vec<String>,
+}
+
+impl Respond for SeqResponder {
+    fn respond(&self, _: &wiremock::Request) -> ResponseTemplate {
+        let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
+        match self.responses.get(call_num) {
+            Some(response) => ResponseTemplate::new(200)
+                .insert_header("content-type", "text/event-stream")
+                .set_body_raw(response.clone(), "text/event-stream"),
+            None => panic!("no response for {call_num}"),
+        }
+    }
+}
diff --git a/codex-rs/mcp-server/tests/common/mod.rs b/codex-rs/mcp-server/tests/common/mod.rs
new file mode 100644
index 00000000..61a5774b
--- /dev/null
+++ b/codex-rs/mcp-server/tests/common/mod.rs
@@ -0,0 +1,8 @@
+mod mcp_process;
+mod mock_model_server;
+mod responses;
+
+pub use mcp_process::McpProcess;
+pub use mock_model_server::create_mock_chat_completions_server;
+pub use responses::create_final_assistant_message_sse_response;
+pub use responses::create_shell_sse_response;
diff --git a/codex-rs/mcp-server/tests/common/responses.rs b/codex-rs/mcp-server/tests/common/responses.rs
new file mode 100644
index 00000000..a11c72d0
--- /dev/null
+++ b/codex-rs/mcp-server/tests/common/responses.rs
@@ -0,0 +1,59 @@
+use serde_json::json;
+use std::path::Path;
+
+pub fn create_shell_sse_response(
+    command: Vec<String>,
+    workdir: Option<&Path>,
+    timeout_ms: Option<u64>,
+    call_id: &str,
+) -> anyhow::Result<String> {
+    // The `arguments`` for the `shell` tool is a serialized JSON object.
+    let tool_call_arguments = serde_json::to_string(&json!({
+        "command": command,
+        "workdir": workdir.map(|w| w.to_string_lossy()),
+        "timeout": timeout_ms
+    }))?;
+    let tool_call = json!({
+        "choices": [
+            {
+                "delta": {
+                    "tool_calls": [
+                        {
+                            "id": call_id,
+                            "function": {
+                                "name": "shell",
+                                "arguments": tool_call_arguments
+                            }
+                        }
+                    ]
+                },
+                "finish_reason": "tool_calls"
+            }
+        ]
+    });
+
+    let sse = format!(
+        "data: {}\n\ndata: DONE\n\n",
+        serde_json::to_string(&tool_call)?
+    );
+    Ok(sse)
+}
+
+pub fn create_final_assistant_message_sse_response(message: &str) -> anyhow::Result<String> {
+    let assistant_message = json!({
+        "choices": [
+            {
+                "delta": {
+                    "content": message
+                },
+                "finish_reason": "stop"
+            }
+        ]
+    });
+
+    let sse = format!(
+        "data: {}\n\ndata: DONE\n\n",
+        serde_json::to_string(&assistant_message)?
+    );
+    Ok(sse)
+}
diff --git a/codex-rs/mcp-server/tests/elicitation.rs b/codex-rs/mcp-server/tests/elicitation.rs
new file mode 100644
index 00000000..7fd68d67
--- /dev/null
+++ b/codex-rs/mcp-server/tests/elicitation.rs
@@ -0,0 +1,195 @@
+mod common;
+
+use std::path::Path;
+
+use codex_core::exec::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR;
+use codex_core::protocol::ReviewDecision;
+use codex_mcp_server::ExecApprovalElicitRequestParams;
+use codex_mcp_server::ExecApprovalResponse;
+use mcp_types::ElicitRequest;
+use mcp_types::ElicitRequestParamsRequestedSchema;
+use mcp_types::JSONRPC_VERSION;
+use mcp_types::JSONRPCRequest;
+use mcp_types::JSONRPCResponse;
+use mcp_types::ModelContextProtocolRequest;
+use mcp_types::RequestId;
+use pretty_assertions::assert_eq;
+use serde_json::json;
+use tempfile::TempDir;
+use tokio::time::timeout;
+
+use crate::common::McpProcess;
+use crate::common::create_final_assistant_message_sse_response;
+use crate::common::create_mock_chat_completions_server;
+use crate::common::create_shell_sse_response;
+
+const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
+
+/// Test that a shell command that is not on the "trusted" list triggers an
+/// elicitation request to the MCP and that sending the approval runs the
+/// command, as expected.
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn test_shell_command_approval_triggers_elicitation() {
+    if std::env::var(CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR).is_ok() {
+        println!(
+            "Skipping test because it cannot execute when network is disabled in a Codex sandbox."
+        );
+        return;
+    }
+
+    // Apparently `#[tokio::test]` must return `()`, so we create a helper
+    // function that returns `Result` so we can use `?` in favor of `unwrap`.
+    if let Err(err) = shell_command_approval_triggers_elicitation().await {
+        panic!("failure: {err}");
+    }
+}
+
+async fn shell_command_approval_triggers_elicitation() -> anyhow::Result<()> {
+    // We use `git init` because it will not be on the "trusted" list.
+    let shell_command = vec!["git".to_string(), "init".to_string()];
+    let workdir_for_shell_function_call = TempDir::new()?;
+
+    // Configure the mock server so it makes two responses:
+    // 1. The first response is a shell function call that will trigger an
+    //    elicitation request.
+    // 2. The second response is the final assistant message that should be
+    //    returned after the elicitation is approved and the command is run.
+    let server = create_mock_chat_completions_server(vec![
+        create_shell_sse_response(
+            shell_command.clone(),
+            Some(workdir_for_shell_function_call.path()),
+            Some(5_000),
+            "call1234",
+        )?,
+        create_final_assistant_message_sse_response("Enjoy your new git repo!")?,
+    ])
+    .await;
+
+    // Run `codex mcp` with a specific config.toml.
+    let codex_home = TempDir::new()?;
+    create_config_toml(codex_home.path(), server.uri())?;
+    let mut mcp_process = McpProcess::new(codex_home.path()).await?;
+    timeout(DEFAULT_READ_TIMEOUT, mcp_process.initialize()).await??;
+
+    // Send a "codex" tool request, which should hit the completions endpoint.
+    // In turn, it should reply with a tool call, which the MCP should forward
+    // as an elicitation.
+    let codex_request_id = mcp_process.send_codex_tool_call("run `git init`").await?;
+    let elicitation_request = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp_process.read_stream_until_request_message(),
+    )
+    .await??;
+
+    // This is the first request from the server, so the id should be 0 given
+    // how things are currently implemented.
+    let elicitation_request_id = RequestId::Integer(0);
+    let expected_elicitation_request = create_expected_elicitation_request(
+        elicitation_request_id.clone(),
+        shell_command.clone(),
+        workdir_for_shell_function_call.path(),
+        codex_request_id.to_string(),
+        // Internal Codex id: empirically it is 1, but this is
+        // admittedly an internal detail that could change.
+        "1".to_string(),
+    )?;
+    assert_eq!(expected_elicitation_request, elicitation_request);
+
+    // Accept the `git init` request by responding to the elicitation.
+    mcp_process
+        .send_response(
+            elicitation_request_id,
+            serde_json::to_value(ExecApprovalResponse {
+                decision: ReviewDecision::Approved,
+            })?,
+        )
+        .await?;
+
+    // Verify the original `codex` tool call completes and that `git init` ran
+    // successfully.
+    let codex_response = timeout(
+        DEFAULT_READ_TIMEOUT,
+        mcp_process.read_stream_until_response_message(RequestId::Integer(codex_request_id)),
+    )
+    .await??;
+    assert_eq!(
+        JSONRPCResponse {
+            jsonrpc: JSONRPC_VERSION.into(),
+            id: RequestId::Integer(codex_request_id),
+            result: json!({
+                "content": [
+                    {
+                        "text": "Enjoy your new git repo!",
+                        "type": "text"
+                    }
+                ]
+            }),
+        },
+        codex_response
+    );
+
+    assert!(
+        workdir_for_shell_function_call.path().join(".git").is_dir(),
+        ".git folder should have been created"
+    );
+
+    Ok(())
+}
+
+/// Create a Codex config that uses the mock server as the model provider.
+/// It also uses `approval_policy = "untrusted"` so that we exercise the
+/// elicitation code path for shell commands.
+fn create_config_toml(codex_home: &Path, server_uri: String) -> std::io::Result<()> {
+    let config_toml = codex_home.join("config.toml");
+    std::fs::write(
+        config_toml,
+        format!(
+            r#"
+model = "mock-model"
+approval_policy = "untrusted"
+sandbox_policy = "read-only"
+
+model_provider = "mock_provider"
+
+[model_providers.mock_provider]
+name = "Mock provider for test"
+base_url = "{server_uri}/v1"
+wire_api = "chat"
+request_max_retries = 0
+stream_max_retries = 0
+"#
+        ),
+    )
+}
+
+fn create_expected_elicitation_request(
+    elicitation_request_id: RequestId,
+    command: Vec<String>,
+    workdir: &Path,
+    codex_mcp_tool_call_id: String,
+    codex_event_id: String,
+) -> anyhow::Result<JSONRPCRequest> {
+    let expected_message = format!(
+        "Allow Codex to run `{}` in `{}`?",
+        shlex::try_join(command.iter().map(|s| s.as_ref()))?,
+        workdir.to_string_lossy()
+    );
+    Ok(JSONRPCRequest {
+        jsonrpc: JSONRPC_VERSION.into(),
+        id: elicitation_request_id,
+        method: ElicitRequest::METHOD.to_string(),
+        params: Some(serde_json::to_value(&ExecApprovalElicitRequestParams {
+            message: expected_message,
+            requested_schema: ElicitRequestParamsRequestedSchema {
+                r#type: "object".to_string(),
+                properties: json!({}),
+                required: None,
+            },
+            codex_elicitation: "exec-approval".to_string(),
+            codex_mcp_tool_call_id,
+            codex_event_id,
+            codex_command: command,
+            codex_cwd: workdir.to_path_buf(),
+        })?),
+    })
+}