feat(mcp): per-server startup timeout (#3182)
Seeing timeouts on certain, slow mcp server starting up when codex is invoked. Before this change, the timeout was a hard-coded 10s. Need the ability to define arbitrary timeouts on a per-server basis. ## Summary of changes - Add startup_timeout_ms to McpServerConfig with 10s default when unset - Use per-server timeout for initialize and tools/list - Introduce ManagedClient to store client and timeout; rename LIST_TOOLS_TIMEOUT to DEFAULT_STARTUP_TIMEOUT - Update docs to document startup_timeout_ms with example and options table --------- Co-authored-by: Matthew Dolan <dolan-openai@users.noreply.github.com>
This commit is contained in:
@@ -18,6 +18,10 @@ pub struct McpServerConfig {
|
|||||||
|
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub env: Option<HashMap<String, String>>,
|
pub env: Option<HashMap<String, String>>,
|
||||||
|
|
||||||
|
/// Startup timeout in milliseconds for initializing MCP server & initially listing tools.
|
||||||
|
#[serde(default)]
|
||||||
|
pub startup_timeout_ms: Option<u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Deserialize, Debug, Copy, Clone, PartialEq)]
|
#[derive(Deserialize, Debug, Copy, Clone, PartialEq)]
|
||||||
|
|||||||
@@ -9,6 +9,7 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
|
use std::sync::Arc;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
@@ -36,8 +37,8 @@ use crate::config_types::McpServerConfig;
|
|||||||
const MCP_TOOL_NAME_DELIMITER: &str = "__";
|
const MCP_TOOL_NAME_DELIMITER: &str = "__";
|
||||||
const MAX_TOOL_NAME_LENGTH: usize = 64;
|
const MAX_TOOL_NAME_LENGTH: usize = 64;
|
||||||
|
|
||||||
/// Timeout for the `tools/list` request.
|
/// Default timeout for initializing MCP server & initially listing tools.
|
||||||
const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10);
|
const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10);
|
||||||
|
|
||||||
/// Map that holds a startup error for every MCP server that could **not** be
|
/// Map that holds a startup error for every MCP server that could **not** be
|
||||||
/// spawned successfully.
|
/// spawned successfully.
|
||||||
@@ -81,6 +82,11 @@ struct ToolInfo {
|
|||||||
tool: Tool,
|
tool: Tool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ManagedClient {
|
||||||
|
client: Arc<McpClient>,
|
||||||
|
startup_timeout: Duration,
|
||||||
|
}
|
||||||
|
|
||||||
/// A thin wrapper around a set of running [`McpClient`] instances.
|
/// A thin wrapper around a set of running [`McpClient`] instances.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub(crate) struct McpConnectionManager {
|
pub(crate) struct McpConnectionManager {
|
||||||
@@ -88,7 +94,7 @@ pub(crate) struct McpConnectionManager {
|
|||||||
///
|
///
|
||||||
/// The server name originates from the keys of the `mcp_servers` map in
|
/// The server name originates from the keys of the `mcp_servers` map in
|
||||||
/// the user configuration.
|
/// the user configuration.
|
||||||
clients: HashMap<String, std::sync::Arc<McpClient>>,
|
clients: HashMap<String, ManagedClient>,
|
||||||
|
|
||||||
/// Fully qualified tool name -> tool instance.
|
/// Fully qualified tool name -> tool instance.
|
||||||
tools: HashMap<String, ToolInfo>,
|
tools: HashMap<String, ToolInfo>,
|
||||||
@@ -126,8 +132,15 @@ impl McpConnectionManager {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let startup_timeout = cfg
|
||||||
|
.startup_timeout_ms
|
||||||
|
.map(Duration::from_millis)
|
||||||
|
.unwrap_or(DEFAULT_STARTUP_TIMEOUT);
|
||||||
|
|
||||||
join_set.spawn(async move {
|
join_set.spawn(async move {
|
||||||
let McpServerConfig { command, args, env } = cfg;
|
let McpServerConfig {
|
||||||
|
command, args, env, ..
|
||||||
|
} = cfg;
|
||||||
let client_res = McpClient::new_stdio_client(
|
let client_res = McpClient::new_stdio_client(
|
||||||
command.into(),
|
command.into(),
|
||||||
args.into_iter().map(OsString::from).collect(),
|
args.into_iter().map(OsString::from).collect(),
|
||||||
@@ -154,12 +167,15 @@ impl McpConnectionManager {
|
|||||||
protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
|
protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(),
|
||||||
};
|
};
|
||||||
let initialize_notification_params = None;
|
let initialize_notification_params = None;
|
||||||
let timeout = Some(Duration::from_secs(10));
|
|
||||||
match client
|
match client
|
||||||
.initialize(params, initialize_notification_params, timeout)
|
.initialize(
|
||||||
|
params,
|
||||||
|
initialize_notification_params,
|
||||||
|
Some(startup_timeout),
|
||||||
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(_response) => (server_name, Ok(client)),
|
Ok(_response) => (server_name, Ok((client, startup_timeout))),
|
||||||
Err(e) => (server_name, Err(e)),
|
Err(e) => (server_name, Err(e)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -168,15 +184,20 @@ impl McpConnectionManager {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut clients: HashMap<String, std::sync::Arc<McpClient>> =
|
let mut clients: HashMap<String, ManagedClient> = HashMap::with_capacity(join_set.len());
|
||||||
HashMap::with_capacity(join_set.len());
|
|
||||||
|
|
||||||
while let Some(res) = join_set.join_next().await {
|
while let Some(res) = join_set.join_next().await {
|
||||||
let (server_name, client_res) = res?; // JoinError propagation
|
let (server_name, client_res) = res?; // JoinError propagation
|
||||||
|
|
||||||
match client_res {
|
match client_res {
|
||||||
Ok(client) => {
|
Ok((client, startup_timeout)) => {
|
||||||
clients.insert(server_name, std::sync::Arc::new(client));
|
clients.insert(
|
||||||
|
server_name,
|
||||||
|
ManagedClient {
|
||||||
|
client: Arc::new(client),
|
||||||
|
startup_timeout,
|
||||||
|
},
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
errors.insert(server_name, e);
|
errors.insert(server_name, e);
|
||||||
@@ -212,6 +233,7 @@ impl McpConnectionManager {
|
|||||||
.clients
|
.clients
|
||||||
.get(server)
|
.get(server)
|
||||||
.ok_or_else(|| anyhow!("unknown MCP server '{server}'"))?
|
.ok_or_else(|| anyhow!("unknown MCP server '{server}'"))?
|
||||||
|
.client
|
||||||
.clone();
|
.clone();
|
||||||
|
|
||||||
client
|
client
|
||||||
@@ -229,21 +251,18 @@ impl McpConnectionManager {
|
|||||||
|
|
||||||
/// Query every server for its available tools and return a single map that
|
/// Query every server for its available tools and return a single map that
|
||||||
/// contains **all** tools. Each key is the fully-qualified name for the tool.
|
/// contains **all** tools. Each key is the fully-qualified name for the tool.
|
||||||
async fn list_all_tools(
|
async fn list_all_tools(clients: &HashMap<String, ManagedClient>) -> Result<Vec<ToolInfo>> {
|
||||||
clients: &HashMap<String, std::sync::Arc<McpClient>>,
|
|
||||||
) -> Result<Vec<ToolInfo>> {
|
|
||||||
let mut join_set = JoinSet::new();
|
let mut join_set = JoinSet::new();
|
||||||
|
|
||||||
// Spawn one task per server so we can query them concurrently. This
|
// Spawn one task per server so we can query them concurrently. This
|
||||||
// keeps the overall latency roughly at the slowest server instead of
|
// keeps the overall latency roughly at the slowest server instead of
|
||||||
// the cumulative latency.
|
// the cumulative latency.
|
||||||
for (server_name, client) in clients {
|
for (server_name, managed_client) in clients {
|
||||||
let server_name_cloned = server_name.clone();
|
let server_name_cloned = server_name.clone();
|
||||||
let client_clone = client.clone();
|
let client_clone = managed_client.client.clone();
|
||||||
|
let startup_timeout = managed_client.startup_timeout;
|
||||||
join_set.spawn(async move {
|
join_set.spawn(async move {
|
||||||
let res = client_clone
|
let res = client_clone.list_tools(None, Some(startup_timeout)).await;
|
||||||
.list_tools(None, Some(LIST_TOOLS_TIMEOUT))
|
|
||||||
.await;
|
|
||||||
(server_name_cloned, res)
|
(server_name_cloned, res)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -334,6 +334,8 @@ Defines the list of MCP servers that Codex can consult for tool use. Currently,
|
|||||||
|
|
||||||
**Note:** Codex may cache the list of tools and resources from an MCP server so that Codex can include this information in context at startup without spawning all the servers. This is designed to save resources by loading MCP servers lazily.
|
**Note:** Codex may cache the list of tools and resources from an MCP server so that Codex can include this information in context at startup without spawning all the servers. This is designed to save resources by loading MCP servers lazily.
|
||||||
|
|
||||||
|
Each server may set `startup_timeout_ms` to adjust how long Codex waits for it to start and respond to a tools listing. The default is `10_000` (10 seconds).
|
||||||
|
|
||||||
This config option is comparable to how Claude and Cursor define `mcpServers` in their respective JSON config files, though because Codex uses TOML for its config language, the format is slightly different. For example, the following config in JSON:
|
This config option is comparable to how Claude and Cursor define `mcpServers` in their respective JSON config files, though because Codex uses TOML for its config language, the format is slightly different. For example, the following config in JSON:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
@@ -358,6 +360,8 @@ Should be represented as follows in `~/.codex/config.toml`:
|
|||||||
command = "npx"
|
command = "npx"
|
||||||
args = ["-y", "mcp-server"]
|
args = ["-y", "mcp-server"]
|
||||||
env = { "API_KEY" = "value" }
|
env = { "API_KEY" = "value" }
|
||||||
|
# Optional: override the default 10s startup timeout
|
||||||
|
startup_timeout_ms = 20_000
|
||||||
```
|
```
|
||||||
|
|
||||||
## shell_environment_policy
|
## shell_environment_policy
|
||||||
@@ -574,6 +578,7 @@ Options that are specific to the TUI.
|
|||||||
| `mcp_servers.<id>.command` | string | MCP server launcher command. |
|
| `mcp_servers.<id>.command` | string | MCP server launcher command. |
|
||||||
| `mcp_servers.<id>.args` | array<string> | MCP server args. |
|
| `mcp_servers.<id>.args` | array<string> | MCP server args. |
|
||||||
| `mcp_servers.<id>.env` | map<string,string> | MCP server env vars. |
|
| `mcp_servers.<id>.env` | map<string,string> | MCP server env vars. |
|
||||||
|
| `mcp_servers.<id>.startup_timeout_ms` | number | Startup timeout in milliseconds (default: 10_000). Timeout is applied both for initializing MCP server and initially listing tools. |
|
||||||
| `model_providers.<id>.name` | string | Display name. |
|
| `model_providers.<id>.name` | string | Display name. |
|
||||||
| `model_providers.<id>.base_url` | string | API base URL. |
|
| `model_providers.<id>.base_url` | string | API base URL. |
|
||||||
| `model_providers.<id>.env_key` | string | Env var for API key. |
|
| `model_providers.<id>.env_key` | string | Env var for API key. |
|
||||||
|
|||||||
Reference in New Issue
Block a user