From 6efb52e545aad2b15c102d51ce1fd174b345021e Mon Sep 17 00:00:00 2001 From: dolan <173844978+dolan-openai@users.noreply.github.com> Date: Mon, 8 Sep 2025 08:12:08 -0700 Subject: [PATCH] feat(mcp): per-server startup timeout (#3182) Seeing timeouts on certain, slow mcp server starting up when codex is invoked. Before this change, the timeout was a hard-coded 10s. Need the ability to define arbitrary timeouts on a per-server basis. ## Summary of changes - Add startup_timeout_ms to McpServerConfig with 10s default when unset - Use per-server timeout for initialize and tools/list - Introduce ManagedClient to store client and timeout; rename LIST_TOOLS_TIMEOUT to DEFAULT_STARTUP_TIMEOUT - Update docs to document startup_timeout_ms with example and options table --------- Co-authored-by: Matthew Dolan --- codex-rs/core/src/config_types.rs | 4 ++ codex-rs/core/src/mcp_connection_manager.rs | 57 ++++++++++++++------- docs/config.md | 5 ++ 3 files changed, 47 insertions(+), 19 deletions(-) diff --git a/codex-rs/core/src/config_types.rs b/codex-rs/core/src/config_types.rs index 27c2712b..0722dfc0 100644 --- a/codex-rs/core/src/config_types.rs +++ b/codex-rs/core/src/config_types.rs @@ -18,6 +18,10 @@ pub struct McpServerConfig { #[serde(default)] pub env: Option>, + + /// Startup timeout in milliseconds for initializing MCP server & initially listing tools. + #[serde(default)] + pub startup_timeout_ms: Option, } #[derive(Deserialize, Debug, Copy, Clone, PartialEq)] diff --git a/codex-rs/core/src/mcp_connection_manager.rs b/codex-rs/core/src/mcp_connection_manager.rs index b5813a04..563a4e13 100644 --- a/codex-rs/core/src/mcp_connection_manager.rs +++ b/codex-rs/core/src/mcp_connection_manager.rs @@ -9,6 +9,7 @@ use std::collections::HashMap; use std::collections::HashSet; use std::ffi::OsString; +use std::sync::Arc; use std::time::Duration; use anyhow::Context; @@ -36,8 +37,8 @@ use crate::config_types::McpServerConfig; const MCP_TOOL_NAME_DELIMITER: &str = "__"; const MAX_TOOL_NAME_LENGTH: usize = 64; -/// Timeout for the `tools/list` request. -const LIST_TOOLS_TIMEOUT: Duration = Duration::from_secs(10); +/// Default timeout for initializing MCP server & initially listing tools. +const DEFAULT_STARTUP_TIMEOUT: Duration = Duration::from_secs(10); /// Map that holds a startup error for every MCP server that could **not** be /// spawned successfully. @@ -81,6 +82,11 @@ struct ToolInfo { tool: Tool, } +struct ManagedClient { + client: Arc, + startup_timeout: Duration, +} + /// A thin wrapper around a set of running [`McpClient`] instances. #[derive(Default)] pub(crate) struct McpConnectionManager { @@ -88,7 +94,7 @@ pub(crate) struct McpConnectionManager { /// /// The server name originates from the keys of the `mcp_servers` map in /// the user configuration. - clients: HashMap>, + clients: HashMap, /// Fully qualified tool name -> tool instance. tools: HashMap, @@ -126,8 +132,15 @@ impl McpConnectionManager { continue; } + let startup_timeout = cfg + .startup_timeout_ms + .map(Duration::from_millis) + .unwrap_or(DEFAULT_STARTUP_TIMEOUT); + join_set.spawn(async move { - let McpServerConfig { command, args, env } = cfg; + let McpServerConfig { + command, args, env, .. + } = cfg; let client_res = McpClient::new_stdio_client( command.into(), args.into_iter().map(OsString::from).collect(), @@ -154,12 +167,15 @@ impl McpConnectionManager { protocol_version: mcp_types::MCP_SCHEMA_VERSION.to_owned(), }; let initialize_notification_params = None; - let timeout = Some(Duration::from_secs(10)); match client - .initialize(params, initialize_notification_params, timeout) + .initialize( + params, + initialize_notification_params, + Some(startup_timeout), + ) .await { - Ok(_response) => (server_name, Ok(client)), + Ok(_response) => (server_name, Ok((client, startup_timeout))), Err(e) => (server_name, Err(e)), } } @@ -168,15 +184,20 @@ impl McpConnectionManager { }); } - let mut clients: HashMap> = - HashMap::with_capacity(join_set.len()); + let mut clients: HashMap = HashMap::with_capacity(join_set.len()); while let Some(res) = join_set.join_next().await { let (server_name, client_res) = res?; // JoinError propagation match client_res { - Ok(client) => { - clients.insert(server_name, std::sync::Arc::new(client)); + Ok((client, startup_timeout)) => { + clients.insert( + server_name, + ManagedClient { + client: Arc::new(client), + startup_timeout, + }, + ); } Err(e) => { errors.insert(server_name, e); @@ -212,6 +233,7 @@ impl McpConnectionManager { .clients .get(server) .ok_or_else(|| anyhow!("unknown MCP server '{server}'"))? + .client .clone(); client @@ -229,21 +251,18 @@ impl McpConnectionManager { /// Query every server for its available tools and return a single map that /// contains **all** tools. Each key is the fully-qualified name for the tool. -async fn list_all_tools( - clients: &HashMap>, -) -> Result> { +async fn list_all_tools(clients: &HashMap) -> Result> { let mut join_set = JoinSet::new(); // Spawn one task per server so we can query them concurrently. This // keeps the overall latency roughly at the slowest server instead of // the cumulative latency. - for (server_name, client) in clients { + for (server_name, managed_client) in clients { let server_name_cloned = server_name.clone(); - let client_clone = client.clone(); + let client_clone = managed_client.client.clone(); + let startup_timeout = managed_client.startup_timeout; join_set.spawn(async move { - let res = client_clone - .list_tools(None, Some(LIST_TOOLS_TIMEOUT)) - .await; + let res = client_clone.list_tools(None, Some(startup_timeout)).await; (server_name_cloned, res) }); } diff --git a/docs/config.md b/docs/config.md index d17eab47..a7f52249 100644 --- a/docs/config.md +++ b/docs/config.md @@ -334,6 +334,8 @@ Defines the list of MCP servers that Codex can consult for tool use. Currently, **Note:** Codex may cache the list of tools and resources from an MCP server so that Codex can include this information in context at startup without spawning all the servers. This is designed to save resources by loading MCP servers lazily. +Each server may set `startup_timeout_ms` to adjust how long Codex waits for it to start and respond to a tools listing. The default is `10_000` (10 seconds). + This config option is comparable to how Claude and Cursor define `mcpServers` in their respective JSON config files, though because Codex uses TOML for its config language, the format is slightly different. For example, the following config in JSON: ```json @@ -358,6 +360,8 @@ Should be represented as follows in `~/.codex/config.toml`: command = "npx" args = ["-y", "mcp-server"] env = { "API_KEY" = "value" } +# Optional: override the default 10s startup timeout +startup_timeout_ms = 20_000 ``` ## shell_environment_policy @@ -574,6 +578,7 @@ Options that are specific to the TUI. | `mcp_servers..command` | string | MCP server launcher command. | | `mcp_servers..args` | array | MCP server args. | | `mcp_servers..env` | map | MCP server env vars. | +| `mcp_servers..startup_timeout_ms` | number | Startup timeout in milliseconds (default: 10_000). Timeout is applied both for initializing MCP server and initially listing tools. | | `model_providers..name` | string | Display name. | | `model_providers..base_url` | string | API base URL. | | `model_providers..env_key` | string | Env var for API key. |