feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
This commit is contained in:
Michael Bolin
2025-05-06 15:47:59 -07:00
committed by GitHub
parent 49d040215a
commit 147a940449
11 changed files with 453 additions and 18 deletions

View File

@@ -1,4 +1,5 @@
use std::collections::BTreeMap;
use std::collections::HashMap;
use std::io::BufRead;
use std::path::Path;
use std::pin::Pin;
@@ -13,6 +14,7 @@ use futures::prelude::*;
use reqwest::StatusCode;
use serde::Deserialize;
use serde::Serialize;
use serde_json::json;
use serde_json::Value;
use tokio::sync::mpsc;
use tokio::time::timeout;
@@ -42,6 +44,11 @@ pub struct Prompt {
pub instructions: Option<String>,
/// Whether to store response on server side (disable_response_storage = !store).
pub store: bool,
/// Additional tools sourced from external MCP servers. Note each key is
/// the "fully qualified" tool name (i.e., prefixed with the server name),
/// which should be reported to the model in place of Tool::name.
pub extra_tools: HashMap<String, mcp_types::Tool>,
}
#[derive(Debug)]
@@ -59,7 +66,7 @@ struct Payload<'a> {
// we code defensively to avoid this case, but perhaps we should use a
// separate enum for serialization.
input: &'a Vec<ResponseItem>,
tools: &'a [Tool],
tools: &'a [serde_json::Value],
tool_choice: &'static str,
parallel_tool_calls: bool,
reasoning: Option<Reasoning>,
@@ -77,11 +84,12 @@ struct Reasoning {
generate_summary: Option<bool>,
}
/// When serialized as JSON, this produces a valid "Tool" in the OpenAI
/// Responses API.
#[derive(Debug, Serialize)]
struct Tool {
struct ResponsesApiTool {
name: &'static str,
#[serde(rename = "type")]
kind: &'static str, // "function"
r#type: &'static str, // "function"
description: &'static str,
strict: bool,
parameters: JsonSchema,
@@ -105,7 +113,7 @@ enum JsonSchema {
}
/// Tool usage specification
static TOOLS: LazyLock<Vec<Tool>> = LazyLock::new(|| {
static DEFAULT_TOOLS: LazyLock<Vec<ResponsesApiTool>> = LazyLock::new(|| {
let mut properties = BTreeMap::new();
properties.insert(
"command".to_string(),
@@ -116,9 +124,9 @@ static TOOLS: LazyLock<Vec<Tool>> = LazyLock::new(|| {
properties.insert("workdir".to_string(), JsonSchema::String);
properties.insert("timeout".to_string(), JsonSchema::Number);
vec![Tool {
vec![ResponsesApiTool {
name: "shell",
kind: "function",
r#type: "function",
description: "Runs a shell command, and returns its output.",
strict: false,
parameters: JsonSchema::Object {
@@ -149,11 +157,26 @@ impl ModelClient {
return stream_from_fixture(path).await;
}
// Assemble tool list: built-in tools + any extra tools from the prompt.
let mut tools_json: Vec<serde_json::Value> = DEFAULT_TOOLS
.iter()
.map(|t| serde_json::to_value(t).expect("serialize builtin tool"))
.collect();
tools_json.extend(
prompt
.extra_tools
.clone()
.into_iter()
.map(|(name, tool)| mcp_tool_to_openai_tool(name, tool)),
);
debug!("tools_json: {}", serde_json::to_string_pretty(&tools_json)?);
let payload = Payload {
model: &self.model,
instructions: prompt.instructions.as_ref(),
input: &prompt.input,
tools: &TOOLS,
tools: &tools_json,
tool_choice: "auto",
parallel_tool_calls: false,
reasoning: Some(Reasoning {
@@ -235,6 +258,20 @@ impl ModelClient {
}
}
fn mcp_tool_to_openai_tool(
fully_qualified_name: String,
tool: mcp_types::Tool,
) -> serde_json::Value {
// TODO(mbolin): Change the contract of this function to return
// ResponsesApiTool.
json!({
"name": fully_qualified_name,
"description": tool.description,
"parameters": tool.input_schema,
"type": "function",
})
}
#[derive(Debug, Deserialize, Serialize)]
struct SseEvent {
#[serde(rename = "type")]