feat: make Codex available as a tool when running it as an MCP server (#811)

This PR replaces the placeholder `"echo"` tool call in the MCP server with a `"codex"` tool that calls Codex. Events such as `ExecApprovalRequest` and `ApplyPatchApprovalRequest` are not handled properly yet, but I have `approval_policy = "never"` set in my `~/.codex/config.toml` such that those codepaths are not exercised. The schema for this MPC tool is defined by a new `CodexToolCallParam` struct introduced in this PR. It is fairly similar to `ConfigOverrides`, as the param is used to help create the `Config` used to start the Codex session, though it also includes the `prompt` used to kick off the session. This PR also introduces the use of the third-party `schemars` crate to generate the JSON schema, which is verified in the `verify_codex_tool_json_schema()` unit test. Events that are dispatched during the Codex session are sent back to the MCP client as MCP notifications. This gives the client a way to monitor progress as the tool call itself may take minutes to complete depending on the complexity of the task requested by the user. In the video below, I launched the server via: ```shell mcp-server$ RUST_LOG=debug npx @modelcontextprotocol/inspector cargo run -- ``` In the video, you can see the flow of: * requesting the list of tools * choosing the **codex** tool * entering a value for **prompt** and then making the tool call Note that I left the other fields blank because when unspecified, the values in my `~/.codex/config.toml` were used: https://github.com/user-attachments/assets/1975058c-b004-43ef-8c8d-800a953b8192 Note that while using the inspector, I did run into https://github.com/modelcontextprotocol/inspector/issues/293, though the tip about ensuring I had only one instance of the **MCP Inspector** tab open in my browser seemed to fix things.
2025-05-05 07:16:19 -07:00
parent 5d924d44cf
commit 2b72d05c5e
6 changed files with 548 additions and 41 deletions
--- a/codex-rs/mcp-server/src/codex_tool_config.rs
+++ b/codex-rs/mcp-server/src/codex_tool_config.rs
@@ -0,0 +1,244 @@
+//! Configuration object accepted by the `codex` MCP tool-call.
+
+use std::path::PathBuf;
+
+use mcp_types::Tool;
+use mcp_types::ToolInputSchema;
+use schemars::r#gen::SchemaSettings;
+use schemars::JsonSchema;
+use serde::Deserialize;
+
+use codex_core::protocol::AskForApproval;
+use codex_core::protocol::SandboxPolicy;
+
+/// Client-supplied configuration for a `codex` tool-call.
+#[derive(Debug, Clone, Deserialize, JsonSchema)]
+#[serde(rename_all = "kebab-case")]
+pub(crate) struct CodexToolCallParam {
+    /// The *initial user prompt* to start the Codex conversation.
+    pub prompt: String,
+
+    /// Optional override for the model name (e.g. "o3", "o4-mini")
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+
+    /// Working directory for the session. If relative, it is resolved against
+    /// the server process's current working directory.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub cwd: Option<String>,
+
+    /// Execution approval policy expressed as the kebab-case variant name
+    /// (`unless-allow-listed`, `auto-edit`, `on-failure`, `never`).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub approval_policy: Option<CodexToolCallApprovalPolicy>,
+
+    /// Sandbox permissions using the same string values accepted by the CLI
+    /// (e.g. "disk-write-cwd", "network-full-access").
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub sandbox_permissions: Option<Vec<CodexToolCallSandboxPermission>>,
+
+    /// Disable server-side response storage.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub disable_response_storage: Option<bool>,
+    // Custom system instructions.
+    // #[serde(default, skip_serializing_if = "Option::is_none")]
+    // pub instructions: Option<String>,
+}
+
+// Create custom enums for use with `CodexToolCallApprovalPolicy` where we
+// intentionally exclude docstrings from the generated schema because they
+// introduce anyOf in the the generated JSON schema, which makes it more complex
+// without adding any real value since we aspire to use self-descriptive names.
+
+#[derive(Debug, Clone, Deserialize, JsonSchema)]
+#[serde(rename_all = "kebab-case")]
+pub(crate) enum CodexToolCallApprovalPolicy {
+    AutoEdit,
+    UnlessAllowListed,
+    OnFailure,
+    Never,
+}
+
+impl From<CodexToolCallApprovalPolicy> for AskForApproval {
+    fn from(value: CodexToolCallApprovalPolicy) -> Self {
+        match value {
+            CodexToolCallApprovalPolicy::AutoEdit => AskForApproval::AutoEdit,
+            CodexToolCallApprovalPolicy::UnlessAllowListed => AskForApproval::UnlessAllowListed,
+            CodexToolCallApprovalPolicy::OnFailure => AskForApproval::OnFailure,
+            CodexToolCallApprovalPolicy::Never => AskForApproval::Never,
+        }
+    }
+}
+
+// TODO: Support additional writable folders via a separate property on
+// CodexToolCallParam.
+
+#[derive(Debug, Clone, Deserialize, JsonSchema)]
+#[serde(rename_all = "kebab-case")]
+pub(crate) enum CodexToolCallSandboxPermission {
+    DiskFullReadAccess,
+    DiskWriteCwd,
+    DiskWritePlatformUserTempFolder,
+    DiskWritePlatformGlobalTempFolder,
+    DiskFullWriteAccess,
+    NetworkFullAccess,
+}
+
+impl From<CodexToolCallSandboxPermission> for codex_core::protocol::SandboxPermission {
+    fn from(value: CodexToolCallSandboxPermission) -> Self {
+        match value {
+            CodexToolCallSandboxPermission::DiskFullReadAccess => {
+                codex_core::protocol::SandboxPermission::DiskFullReadAccess
+            }
+            CodexToolCallSandboxPermission::DiskWriteCwd => {
+                codex_core::protocol::SandboxPermission::DiskWriteCwd
+            }
+            CodexToolCallSandboxPermission::DiskWritePlatformUserTempFolder => {
+                codex_core::protocol::SandboxPermission::DiskWritePlatformUserTempFolder
+            }
+            CodexToolCallSandboxPermission::DiskWritePlatformGlobalTempFolder => {
+                codex_core::protocol::SandboxPermission::DiskWritePlatformGlobalTempFolder
+            }
+            CodexToolCallSandboxPermission::DiskFullWriteAccess => {
+                codex_core::protocol::SandboxPermission::DiskFullWriteAccess
+            }
+            CodexToolCallSandboxPermission::NetworkFullAccess => {
+                codex_core::protocol::SandboxPermission::NetworkFullAccess
+            }
+        }
+    }
+}
+
+pub(crate) fn create_tool_for_codex_tool_call_param() -> Tool {
+    let schema = SchemaSettings::draft2019_09()
+        .with(|s| {
+            s.inline_subschemas = true;
+            s.option_add_null_type = false
+        })
+        .into_generator()
+        .into_root_schema_for::<CodexToolCallParam>();
+    let schema_value =
+        serde_json::to_value(&schema).expect("Codex tool schema should serialise to JSON");
+
+    let tool_input_schema =
+        serde_json::from_value::<ToolInputSchema>(schema_value).unwrap_or_else(|e| {
+            panic!("failed to create Tool from schema: {e}");
+        });
+    Tool {
+        name: "codex".to_string(),
+        input_schema: tool_input_schema,
+        description: Some(
+            "Run a Codex session. Accepts configuration parameters matching the Codex Config struct."
+                .to_string(),
+        ),
+        annotations: None,
+    }
+}
+
+impl CodexToolCallParam {
+    /// Returns the initial user prompt to start the Codex conversation and the
+    /// Config.
+    pub fn into_config(self) -> std::io::Result<(String, codex_core::config::Config)> {
+        let Self {
+            prompt,
+            model,
+            cwd,
+            approval_policy,
+            sandbox_permissions,
+            disable_response_storage,
+        } = self;
+        let sandbox_policy = sandbox_permissions.map(|perms| {
+            SandboxPolicy::from(perms.into_iter().map(Into::into).collect::<Vec<_>>())
+        });
+
+        // Build ConfigOverrides recognised by codex-core.
+        let overrides = codex_core::config::ConfigOverrides {
+            model,
+            cwd: cwd.map(PathBuf::from),
+            approval_policy: approval_policy.map(Into::into),
+            sandbox_policy,
+            disable_response_storage,
+        };
+
+        let cfg = codex_core::config::Config::load_with_overrides(overrides)?;
+
+        Ok((prompt, cfg))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+
+    /// We include a test to verify the exact JSON schema as "executable
+    /// documentation" for the schema. When can track changes to this test as a
+    /// way to audit changes to the generated schema.
+    ///
+    /// Seeing the fully expanded schema makes it easier to casually verify that
+    /// the generated JSON for enum types such as "approval-policy" is compact.
+    /// Ideally, modelcontextprotocol/inspector would provide a simpler UI for
+    /// enum fields versus open string fields to take advantage of this.
+    ///
+    /// As of 2025-05-04, there is an open PR for this:
+    /// https://github.com/modelcontextprotocol/inspector/pull/196
+    #[test]
+    fn verify_codex_tool_json_schema() {
+        let tool = create_tool_for_codex_tool_call_param();
+        let tool_json = serde_json::to_value(&tool).expect("tool serializes");
+        let expected_tool_json = serde_json::json!({
+          "name": "codex",
+          "description": "Run a Codex session. Accepts configuration parameters matching the Codex Config struct.",
+          "inputSchema": {
+            "type": "object",
+            "properties": {
+              "approval-policy": {
+                "description": "Execution approval policy expressed as the kebab-case variant name (`unless-allow-listed`, `auto-edit`, `on-failure`, `never`).",
+                "enum": [
+                  "auto-edit",
+                  "unless-allow-listed",
+                  "on-failure",
+                  "never"
+                ],
+                "type": "string"
+              },
+              "cwd": {
+                "description": "Working directory for the session. If relative, it is resolved against the server process's current working directory.",
+                "type": "string"
+              },
+              "disable-response-storage": {
+                "description": "Disable server-side response storage.",
+                "type": "boolean"
+              },
+              "model": {
+                "description": "Optional override for the model name (e.g. \"o3\", \"o4-mini\")",
+                "type": "string"
+              },
+              "prompt": {
+                "description": "The *initial user prompt* to start the Codex conversation.",
+                "type": "string"
+              },
+              "sandbox-permissions": {
+                "description": "Sandbox permissions using the same string values accepted by the CLI (e.g. \"disk-write-cwd\", \"network-full-access\").",
+                "items": {
+                  "enum": [
+                    "disk-full-read-access",
+                    "disk-write-cwd",
+                    "disk-write-platform-user-temp-folder",
+                    "disk-write-platform-global-temp-folder",
+                    "disk-full-write-access",
+                    "network-full-access"
+                  ],
+                  "type": "string"
+                },
+                "type": "array"
+              }
+            },
+            "required": [
+              "prompt"
+            ]
+          }
+        });
+        assert_eq!(expected_tool_json, tool_json);
+    }
+}
--- a/codex-rs/mcp-server/src/codex_tool_runner.rs
+++ b/codex-rs/mcp-server/src/codex_tool_runner.rs
@@ -0,0 +1,181 @@
+//! Asynchronous worker that executes a **Codex** tool-call inside a spawned
+//! Tokio task. Separated from `message_processor.rs` to keep that file small
+//! and to make future feature-growth easier to manage.
+
+use codex_core::codex_wrapper::init_codex;
+use codex_core::config::Config as CodexConfig;
+use codex_core::protocol::Event;
+use codex_core::protocol::EventMsg;
+use codex_core::protocol::InputItem;
+use codex_core::protocol::Op;
+use mcp_types::CallToolResult;
+use mcp_types::CallToolResultContent;
+use mcp_types::JSONRPCMessage;
+use mcp_types::JSONRPCResponse;
+use mcp_types::RequestId;
+use mcp_types::TextContent;
+use mcp_types::JSONRPC_VERSION;
+use tokio::sync::mpsc::Sender;
+
+/// Convert a Codex [`Event`] to an MCP notification.
+fn codex_event_to_notification(event: &Event) -> JSONRPCMessage {
+    JSONRPCMessage::Notification(mcp_types::JSONRPCNotification {
+        jsonrpc: JSONRPC_VERSION.into(),
+        method: "codex/event".into(),
+        params: Some(serde_json::to_value(event).expect("Event must serialize")),
+    })
+}
+
+/// Run a complete Codex session and stream events back to the client.
+///
+/// On completion (success or error) the function sends the appropriate
+/// `tools/call` response so the LLM can continue the conversation.
+pub async fn run_codex_tool_session(
+    id: RequestId,
+    initial_prompt: String,
+    config: CodexConfig,
+    outgoing: Sender<JSONRPCMessage>,
+) {
+    let (codex, first_event, _ctrl_c) = match init_codex(config).await {
+        Ok(res) => res,
+        Err(e) => {
+            let result = CallToolResult {
+                content: vec![CallToolResultContent::TextContent(TextContent {
+                    r#type: "text".to_string(),
+                    text: format!("Failed to start Codex session: {e}"),
+                    annotations: None,
+                })],
+                is_error: Some(true),
+            };
+            let _ = outgoing
+                .send(JSONRPCMessage::Response(JSONRPCResponse {
+                    jsonrpc: JSONRPC_VERSION.into(),
+                    id,
+                    result: result.into(),
+                }))
+                .await;
+            return;
+        }
+    };
+
+    // Send initial SessionConfigured event.
+    let _ = outgoing
+        .send(codex_event_to_notification(&first_event))
+        .await;
+
+    if let Err(e) = codex
+        .submit(Op::UserInput {
+            items: vec![InputItem::Text {
+                text: initial_prompt.clone(),
+            }],
+        })
+        .await
+    {
+        tracing::error!("Failed to submit initial prompt: {e}");
+    }
+
+    let mut last_agent_message: Option<String> = None;
+
+    // Stream events until the task needs to pause for user interaction or
+    // completes.
+    loop {
+        match codex.next_event().await {
+            Ok(event) => {
+                let _ = outgoing.send(codex_event_to_notification(&event)).await;
+
+                match &event.msg {
+                    EventMsg::AgentMessage { message } => {
+                        last_agent_message = Some(message.clone());
+                    }
+                    EventMsg::ExecApprovalRequest { .. } => {
+                        let result = CallToolResult {
+                            content: vec![CallToolResultContent::TextContent(TextContent {
+                                r#type: "text".to_string(),
+                                text: "EXEC_APPROVAL_REQUIRED".to_string(),
+                                annotations: None,
+                            })],
+                            is_error: None,
+                        };
+                        let _ = outgoing
+                            .send(JSONRPCMessage::Response(JSONRPCResponse {
+                                jsonrpc: JSONRPC_VERSION.into(),
+                                id: id.clone(),
+                                result: result.into(),
+                            }))
+                            .await;
+                        break;
+                    }
+                    EventMsg::ApplyPatchApprovalRequest { .. } => {
+                        let result = CallToolResult {
+                            content: vec![CallToolResultContent::TextContent(TextContent {
+                                r#type: "text".to_string(),
+                                text: "PATCH_APPROVAL_REQUIRED".to_string(),
+                                annotations: None,
+                            })],
+                            is_error: None,
+                        };
+                        let _ = outgoing
+                            .send(JSONRPCMessage::Response(JSONRPCResponse {
+                                jsonrpc: JSONRPC_VERSION.into(),
+                                id: id.clone(),
+                                result: result.into(),
+                            }))
+                            .await;
+                        break;
+                    }
+                    EventMsg::TaskComplete => {
+                        let result = if let Some(msg) = last_agent_message {
+                            CallToolResult {
+                                content: vec![CallToolResultContent::TextContent(TextContent {
+                                    r#type: "text".to_string(),
+                                    text: msg,
+                                    annotations: None,
+                                })],
+                                is_error: None,
+                            }
+                        } else {
+                            CallToolResult {
+                                content: vec![CallToolResultContent::TextContent(TextContent {
+                                    r#type: "text".to_string(),
+                                    text: String::new(),
+                                    annotations: None,
+                                })],
+                                is_error: None,
+                            }
+                        };
+                        let _ = outgoing
+                            .send(JSONRPCMessage::Response(JSONRPCResponse {
+                                jsonrpc: JSONRPC_VERSION.into(),
+                                id: id.clone(),
+                                result: result.into(),
+                            }))
+                            .await;
+                        break;
+                    }
+                    EventMsg::SessionConfigured { .. } => {
+                        tracing::error!("unexpected SessionConfigured event");
+                    }
+                    _ => {}
+                }
+            }
+            Err(e) => {
+                let result = CallToolResult {
+                    content: vec![CallToolResultContent::TextContent(TextContent {
+                        r#type: "text".to_string(),
+                        text: format!("Codex runtime error: {e}"),
+                        annotations: None,
+                    })],
+                    is_error: Some(true),
+                };
+                let _ = outgoing
+                    .send(JSONRPCMessage::Response(JSONRPCResponse {
+                        jsonrpc: JSONRPC_VERSION.into(),
+                        id: id.clone(),
+                        result: result.into(),
+                    }))
+                    .await;
+                break;
+            }
+        }
+    }
+}
--- a/codex-rs/mcp-server/src/main.rs
+++ b/codex-rs/mcp-server/src/main.rs
@@ -1,4 +1,5 @@
 //! Prototype MCP server.
+#![deny(clippy::print_stdout, clippy::print_stderr)]

 use std::io::Result as IoResult;

@@ -12,7 +13,10 @@ use tracing::debug;
 use tracing::error;
 use tracing::info;

+mod codex_tool_config;
+mod codex_tool_runner;
 mod message_processor;
+
 use crate::message_processor::MessageProcessor;

 /// Size of the bounded channels used to communicate between tasks. The value
--- a/codex-rs/mcp-server/src/message_processor.rs
+++ b/codex-rs/mcp-server/src/message_processor.rs
@@ -1,6 +1,9 @@
-//! Very small proof-of-concept request router for the MCP prototype server.
+use crate::codex_tool_config::create_tool_for_codex_tool_call_param;
+use crate::codex_tool_config::CodexToolCallParam;

+use codex_core::config::Config as CodexConfig;
 use mcp_types::CallToolRequestParams;
+use mcp_types::CallToolResult;
 use mcp_types::CallToolResultContent;
 use mcp_types::ClientRequest;
 use mcp_types::JSONRPCBatchRequest;
@@ -17,11 +20,10 @@ use mcp_types::RequestId;
 use mcp_types::ServerCapabilitiesTools;
 use mcp_types::ServerNotification;
 use mcp_types::TextContent;
-use mcp_types::Tool;
-use mcp_types::ToolInputSchema;
 use mcp_types::JSONRPC_VERSION;
 use serde_json::json;
 use tokio::sync::mpsc;
+use tokio::task;

 pub(crate) struct MessageProcessor {
    outgoing: mpsc::Sender<JSONRPCMessage>,
@@ -303,21 +305,7 @@ impl MessageProcessor {
    ) {
        tracing::trace!("tools/list -> {params:?}");
        let result = ListToolsResult {
-            tools: vec![Tool {
-                name: "echo".to_string(),
-                input_schema: ToolInputSchema {
-                    r#type: "object".to_string(),
-                    properties: Some(json!({
-                        "input": {
-                            "type": "string",
-                            "description": "The input to echo back"
-                        }
-                    })),
-                    required: Some(vec!["input".to_string()]),
-                },
-                description: Some("Echoes the request back".to_string()),
-                annotations: None,
-            }],
+            tools: vec![create_tool_for_codex_tool_call_param()],
            next_cursor: None,
        };

@@ -331,26 +319,80 @@ impl MessageProcessor {
    ) {
        tracing::info!("tools/call -> params: {:?}", params);
        let CallToolRequestParams { name, arguments } = params;
-        match name.as_str() {
-            "echo" => {
-                let result = mcp_types::CallToolResult {
+
+        // We only support the "codex" tool for now.
+        if name != "codex" {
+            // Tool not found – return error result so the LLM can react.
+            let result = CallToolResult {
+                content: vec![CallToolResultContent::TextContent(TextContent {
+                    r#type: "text".to_string(),
+                    text: format!("Unknown tool '{name}'"),
+                    annotations: None,
+                })],
+                is_error: Some(true),
+            };
+            self.send_response::<mcp_types::CallToolRequest>(id, result);
+            return;
+        }
+
+        let (initial_prompt, config): (String, CodexConfig) = match arguments {
+            Some(json_val) => match serde_json::from_value::<CodexToolCallParam>(json_val) {
+                Ok(tool_cfg) => match tool_cfg.into_config() {
+                    Ok(cfg) => cfg,
+                    Err(e) => {
+                        let result = CallToolResult {
+                            content: vec![CallToolResultContent::TextContent(TextContent {
+                                r#type: "text".to_owned(),
+                                text: format!(
+                                    "Failed to load Codex configuration from overrides: {e}"
+                                ),
+                                annotations: None,
+                            })],
+                            is_error: Some(true),
+                        };
+                        self.send_response::<mcp_types::CallToolRequest>(id, result);
+                        return;
+                    }
+                },
+                Err(e) => {
+                    let result = CallToolResult {
+                        content: vec![CallToolResultContent::TextContent(TextContent {
+                            r#type: "text".to_owned(),
+                            text: format!("Failed to parse configuration for Codex tool: {e}"),
+                            annotations: None,
+                        })],
+                        is_error: Some(true),
+                    };
+                    self.send_response::<mcp_types::CallToolRequest>(id, result);
+                    return;
+                }
+            },
+            None => {
+                let result = CallToolResult {
                    content: vec![CallToolResultContent::TextContent(TextContent {
                        r#type: "text".to_string(),
-                        text: format!("Echo: {arguments:?}"),
+                        text:
+                            "Missing arguments for codex tool-call; the `prompt` field is required."
+                                .to_string(),
                        annotations: None,
                    })],
-                    is_error: None,
-                };
-                self.send_response::<mcp_types::CallToolRequest>(id, result);
-            }
-            _ => {
-                let result = mcp_types::CallToolResult {
-                    content: vec![],
                    is_error: Some(true),
                };
                self.send_response::<mcp_types::CallToolRequest>(id, result);
+                return;
            }
-        }
+        };
+
+        // Clone outgoing sender to move into async task.
+        let outgoing = self.outgoing.clone();
+
+        // Spawn an async task to handle the Codex session so that we do not
+        // block the synchronous message-processing loop.
+        task::spawn(async move {
+            // Run the Codex session and stream events back to the client.
+            crate::codex_tool_runner::run_codex_tool_session(id, initial_prompt, config, outgoing)
+                .await;
+        });
    }

    fn handle_set_level(