feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop and Cursor. Note this PR is the bare minimum to get things working end to end: all configured MCP servers are launched every time Codex is run, there is no recovery for MCP servers that crash, etc. (Also, I took some shortcuts to change some fields of `Session` to be `pub(crate)`, which also means there are circular deps between `codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a subsequent PR.) `codex-rs/README.md` is updated as part of this PR to explain how to use this feature. There is a bit of plumbing to route the new settings from `Config` to the business logic in `codex.rs`. The most significant chunks for new code are in `mcp_connection_manager.rs` (which defines the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is responsible for tool calls. This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd` event types to the protocol, but does not add any handlers for them. (See https://github.com/openai/codex/pull/836 for initial usage.) To test, I added the following to my `~/.codex/config.toml`: ```toml # Local build of https://github.com/hideya/mcp-server-weather-js [mcp_servers.weather] command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js" args = [] ``` And then I ran the following: ``` codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco' [2025-05-06T22:40:05] Task started: 1 [2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W): This Afternoon (Tue): • Sunny, high near 69 °F • West-southwest wind around 12 mph Tonight: • Partly cloudy, low around 52 °F • SW wind 7–10 mph ... ``` Note that Codex itself is not able to make network calls, so it would not normally be able to get live weather information like this. However, the weather MCP is [currently] not run under the Codex sandbox, so it is able to hit `api.weather.gov` and fetch current weather information. --- [//]: # (BEGIN SAPLING FOOTER) Stack created with [Sapling](https://sapling-scm.com). Best reviewed with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829). * #836 * __->__ #829
2025-05-06 15:47:59 -07:00
parent 49d040215a
commit 147a940449
11 changed files with 453 additions and 18 deletions
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -31,6 +31,8 @@ use tracing::warn;
 use crate::client::ModelClient;
 use crate::client::Prompt;
 use crate::client::ResponseEvent;
+use crate::config::Config;
+use crate::config::ConfigOverrides;
 use crate::error::CodexErr;
 use crate::error::Result as CodexResult;
 use crate::exec::process_exec_tool_call;
@@ -38,6 +40,9 @@ use crate::exec::ExecParams;
 use crate::exec::ExecToolCallOutput;
 use crate::exec::SandboxType;
 use crate::flags::OPENAI_STREAM_MAX_RETRIES;
+use crate::mcp_connection_manager::try_parse_fully_qualified_tool_name;
+use crate::mcp_connection_manager::McpConnectionManager;
+use crate::mcp_tool_call::handle_mcp_tool_call;
 use crate::models::ContentItem;
 use crate::models::FunctionCallOutputPayload;
 use crate::models::ResponseInputItem;
@@ -188,9 +193,9 @@ impl Recorder {
 /// Context for an initialized model agent
 ///
 /// A session has at most 1 running task at a time, and can be interrupted by user input.
-struct Session {
+pub(crate) struct Session {
    client: ModelClient,
-    tx_event: Sender<Event>,
+    pub(crate) tx_event: Sender<Event>,
    ctrl_c: Arc<Notify>,

    /// The session's current working directory. All relative paths provided by
@@ -202,6 +207,9 @@ struct Session {
    sandbox_policy: SandboxPolicy,
    writable_roots: Mutex<Vec<PathBuf>>,

+    /// Manager for external MCP servers/tools.
+    pub(crate) mcp_connection_manager: McpConnectionManager,
+
    /// External notifier command (will be passed as args to exec()). When
    /// `None` this feature is disabled.
    notify: Option<Vec<String>>,
@@ -433,7 +441,7 @@ impl State {
 }

 /// A series of Turns in response to user input.
-struct AgentTask {
+pub(crate) struct AgentTask {
    sess: Arc<Session>,
    sub_id: String,
    handle: AbortHandle,
@@ -554,6 +562,26 @@ async fn submission_loop(
                };

                let writable_roots = Mutex::new(get_writable_roots(&cwd));
+
+                // Load config to initialize the MCP connection manager.
+                let config = match Config::load_with_overrides(ConfigOverrides::default()) {
+                    Ok(cfg) => cfg,
+                    Err(e) => {
+                        error!("Failed to load config for MCP servers: {e:#}");
+                        // Fall back to empty server map so the session can still proceed.
+                        Config::load_default_config_for_test()
+                    }
+                };
+
+                let mcp_connection_manager =
+                    match McpConnectionManager::new(config.mcp_servers.clone()).await {
+                        Ok(mgr) => mgr,
+                        Err(e) => {
+                            error!("Failed to create MCP connection manager: {e:#}");
+                            McpConnectionManager::default()
+                        }
+                    };
+
                sess = Some(Arc::new(Session {
                    client,
                    tx_event: tx_event.clone(),
@@ -563,6 +591,7 @@ async fn submission_loop(
                    sandbox_policy,
                    cwd,
                    writable_roots,
+                    mcp_connection_manager,
                    notify,
                    state: Mutex::new(state),
                }));
@@ -753,11 +782,14 @@ async fn run_turn(
    } else {
        None
    };
+
+    let extra_tools = sess.mcp_connection_manager.list_all_tools();
    let prompt = Prompt {
        input,
        prev_id,
        instructions,
        store,
+        extra_tools,
    };

    let mut retries = 0;
@@ -1141,13 +1173,20 @@ async fn handle_function_call(
            }
        }
        _ => {
-            // Unknown function: reply with structured failure so the model can adapt.
-            ResponseInputItem::FunctionCallOutput {
-                call_id,
-                output: crate::models::FunctionCallOutputPayload {
-                    content: format!("unsupported call: {}", name),
-                    success: None,
-                },
+            match try_parse_fully_qualified_tool_name(&name) {
+                Some((server, tool_name)) => {
+                    handle_mcp_tool_call(sess, &sub_id, call_id, server, tool_name, arguments).await
+                }
+                None => {
+                    // Unknown function: reply with structured failure so the model can adapt.
+                    ResponseInputItem::FunctionCallOutput {
+                        call_id,
+                        output: crate::models::FunctionCallOutputPayload {
+                            content: format!("unsupported call: {}", name),
+                            success: None,
+                        },
+                    }
+                }
            }
        }
    }