[exec] Add MCP tool arguments and results (#5899)

Extends mcp_tool_call item to include arguments and results.
This commit is contained in:
pakrym-oai
2025-10-29 14:23:57 -07:00
committed by GitHub
parent 13e1d0362d
commit 815ae4164a
7 changed files with 784 additions and 39 deletions

View File

@@ -26,6 +26,7 @@ codex-common = { workspace = true, features = [
codex-core = { workspace = true } codex-core = { workspace = true }
codex-ollama = { workspace = true } codex-ollama = { workspace = true }
codex-protocol = { workspace = true } codex-protocol = { workspace = true }
mcp-types = { workspace = true }
opentelemetry-appender-tracing = { workspace = true } opentelemetry-appender-tracing = { workspace = true }
owo-colors = { workspace = true } owo-colors = { workspace = true }
serde = { workspace = true, features = ["derive"] } serde = { workspace = true, features = ["derive"] }

View File

@@ -14,6 +14,8 @@ use crate::exec_events::ItemCompletedEvent;
use crate::exec_events::ItemStartedEvent; use crate::exec_events::ItemStartedEvent;
use crate::exec_events::ItemUpdatedEvent; use crate::exec_events::ItemUpdatedEvent;
use crate::exec_events::McpToolCallItem; use crate::exec_events::McpToolCallItem;
use crate::exec_events::McpToolCallItemError;
use crate::exec_events::McpToolCallItemResult;
use crate::exec_events::McpToolCallStatus; use crate::exec_events::McpToolCallStatus;
use crate::exec_events::PatchApplyStatus; use crate::exec_events::PatchApplyStatus;
use crate::exec_events::PatchChangeKind; use crate::exec_events::PatchChangeKind;
@@ -48,6 +50,7 @@ use codex_core::protocol::TaskStartedEvent;
use codex_core::protocol::WebSearchEndEvent; use codex_core::protocol::WebSearchEndEvent;
use codex_protocol::plan_tool::StepStatus; use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs; use codex_protocol::plan_tool::UpdatePlanArgs;
use serde_json::Value as JsonValue;
use tracing::error; use tracing::error;
use tracing::warn; use tracing::warn;
@@ -81,6 +84,7 @@ struct RunningMcpToolCall {
server: String, server: String,
tool: String, tool: String,
item_id: String, item_id: String,
arguments: JsonValue,
} }
impl EventProcessorWithJsonOutput { impl EventProcessorWithJsonOutput {
@@ -220,6 +224,7 @@ impl EventProcessorWithJsonOutput {
let item_id = self.get_next_item_id(); let item_id = self.get_next_item_id();
let server = ev.invocation.server.clone(); let server = ev.invocation.server.clone();
let tool = ev.invocation.tool.clone(); let tool = ev.invocation.tool.clone();
let arguments = ev.invocation.arguments.clone().unwrap_or(JsonValue::Null);
self.running_mcp_tool_calls.insert( self.running_mcp_tool_calls.insert(
ev.call_id.clone(), ev.call_id.clone(),
@@ -227,6 +232,7 @@ impl EventProcessorWithJsonOutput {
server: server.clone(), server: server.clone(),
tool: tool.clone(), tool: tool.clone(),
item_id: item_id.clone(), item_id: item_id.clone(),
arguments: arguments.clone(),
}, },
); );
@@ -235,6 +241,9 @@ impl EventProcessorWithJsonOutput {
details: ThreadItemDetails::McpToolCall(McpToolCallItem { details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server, server,
tool, tool,
arguments,
result: None,
error: None,
status: McpToolCallStatus::InProgress, status: McpToolCallStatus::InProgress,
}), }),
}; };
@@ -249,19 +258,42 @@ impl EventProcessorWithJsonOutput {
McpToolCallStatus::Failed McpToolCallStatus::Failed
}; };
let (server, tool, item_id) = match self.running_mcp_tool_calls.remove(&ev.call_id) { let (server, tool, item_id, arguments) =
Some(running) => (running.server, running.tool, running.item_id), match self.running_mcp_tool_calls.remove(&ev.call_id) {
None => { Some(running) => (
warn!( running.server,
call_id = ev.call_id, running.tool,
"Received McpToolCallEnd without begin; synthesizing new item" running.item_id,
); running.arguments,
( ),
ev.invocation.server.clone(), None => {
ev.invocation.tool.clone(), warn!(
self.get_next_item_id(), call_id = ev.call_id,
) "Received McpToolCallEnd without begin; synthesizing new item"
);
(
ev.invocation.server.clone(),
ev.invocation.tool.clone(),
self.get_next_item_id(),
ev.invocation.arguments.clone().unwrap_or(JsonValue::Null),
)
}
};
let (result, error) = match &ev.result {
Ok(value) => {
let result = McpToolCallItemResult {
content: value.content.clone(),
structured_content: value.structured_content.clone(),
};
(Some(result), None)
} }
Err(message) => (
None,
Some(McpToolCallItemError {
message: message.clone(),
}),
),
}; };
let item = ThreadItem { let item = ThreadItem {
@@ -269,6 +301,9 @@ impl EventProcessorWithJsonOutput {
details: ThreadItemDetails::McpToolCall(McpToolCallItem { details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server, server,
tool, tool,
arguments,
result,
error,
status, status,
}), }),
}; };

View File

@@ -1,9 +1,11 @@
use mcp_types::ContentBlock as McpContentBlock;
use serde::Deserialize; use serde::Deserialize;
use serde::Serialize; use serde::Serialize;
use serde_json::Value as JsonValue;
use ts_rs::TS; use ts_rs::TS;
/// Top-level JSONL events emitted by codex exec /// Top-level JSONL events emitted by codex exec
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(tag = "type")] #[serde(tag = "type")]
pub enum ThreadEvent { pub enum ThreadEvent {
/// Emitted when a new thread is started as the first event. /// Emitted when a new thread is started as the first event.
@@ -33,28 +35,28 @@ pub enum ThreadEvent {
Error(ThreadErrorEvent), Error(ThreadErrorEvent),
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadStartedEvent { pub struct ThreadStartedEvent {
/// The identified of the new thread. Can be used to resume the thread later. /// The identified of the new thread. Can be used to resume the thread later.
pub thread_id: String, pub thread_id: String,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
pub struct TurnStartedEvent {} pub struct TurnStartedEvent {}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TurnCompletedEvent { pub struct TurnCompletedEvent {
pub usage: Usage, pub usage: Usage,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TurnFailedEvent { pub struct TurnFailedEvent {
pub error: ThreadErrorEvent, pub error: ThreadErrorEvent,
} }
/// Describes the usage of tokens during a turn. /// Describes the usage of tokens during a turn.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
pub struct Usage { pub struct Usage {
/// The number of input tokens used during the turn. /// The number of input tokens used during the turn.
pub input_tokens: i64, pub input_tokens: i64,
@@ -64,29 +66,29 @@ pub struct Usage {
pub output_tokens: i64, pub output_tokens: i64,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemStartedEvent { pub struct ItemStartedEvent {
pub item: ThreadItem, pub item: ThreadItem,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemCompletedEvent { pub struct ItemCompletedEvent {
pub item: ThreadItem, pub item: ThreadItem,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemUpdatedEvent { pub struct ItemUpdatedEvent {
pub item: ThreadItem, pub item: ThreadItem,
} }
/// Fatal error emitted by the stream. /// Fatal error emitted by the stream.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadErrorEvent { pub struct ThreadErrorEvent {
pub message: String, pub message: String,
} }
/// Canonical representation of a thread item and its domain-specific payload. /// Canonical representation of a thread item and its domain-specific payload.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadItem { pub struct ThreadItem {
pub id: String, pub id: String,
#[serde(flatten)] #[serde(flatten)]
@@ -94,7 +96,7 @@ pub struct ThreadItem {
} }
/// Typed payloads for each supported thread item type. /// Typed payloads for each supported thread item type.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum ThreadItemDetails { pub enum ThreadItemDetails {
/// Response from the agent. /// Response from the agent.
@@ -123,13 +125,13 @@ pub enum ThreadItemDetails {
/// Response from the agent. /// Response from the agent.
/// Either a natural-language response or a JSON string when structured output is requested. /// Either a natural-language response or a JSON string when structured output is requested.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct AgentMessageItem { pub struct AgentMessageItem {
pub text: String, pub text: String,
} }
/// Agent's reasoning summary. /// Agent's reasoning summary.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ReasoningItem { pub struct ReasoningItem {
pub text: String, pub text: String,
} }
@@ -145,24 +147,23 @@ pub enum CommandExecutionStatus {
} }
/// A command executed by the agent. /// A command executed by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct CommandExecutionItem { pub struct CommandExecutionItem {
pub command: String, pub command: String,
pub aggregated_output: String, pub aggregated_output: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub exit_code: Option<i32>, pub exit_code: Option<i32>,
pub status: CommandExecutionStatus, pub status: CommandExecutionStatus,
} }
/// A set of file changes by the agent. /// A set of file changes by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct FileUpdateChange { pub struct FileUpdateChange {
pub path: String, pub path: String,
pub kind: PatchChangeKind, pub kind: PatchChangeKind,
} }
/// The status of a file change. /// The status of a file change.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum PatchApplyStatus { pub enum PatchApplyStatus {
Completed, Completed,
@@ -170,14 +171,14 @@ pub enum PatchApplyStatus {
} }
/// A set of file changes by the agent. /// A set of file changes by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct FileChangeItem { pub struct FileChangeItem {
pub changes: Vec<FileUpdateChange>, pub changes: Vec<FileUpdateChange>,
pub status: PatchApplyStatus, pub status: PatchApplyStatus,
} }
/// Indicates the type of the file change. /// Indicates the type of the file change.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum PatchChangeKind { pub enum PatchChangeKind {
Add, Add,
@@ -195,34 +196,51 @@ pub enum McpToolCallStatus {
Failed, Failed,
} }
/// Result payload produced by an MCP tool invocation.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItemResult {
pub content: Vec<McpContentBlock>,
pub structured_content: Option<JsonValue>,
}
/// Error details reported by a failed MCP tool invocation.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItemError {
pub message: String,
}
/// A call to an MCP tool. /// A call to an MCP tool.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItem { pub struct McpToolCallItem {
pub server: String, pub server: String,
pub tool: String, pub tool: String,
#[serde(default)]
pub arguments: JsonValue,
pub result: Option<McpToolCallItemResult>,
pub error: Option<McpToolCallItemError>,
pub status: McpToolCallStatus, pub status: McpToolCallStatus,
} }
/// A web search request. /// A web search request.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct WebSearchItem { pub struct WebSearchItem {
pub query: String, pub query: String,
} }
/// An error notification. /// An error notification.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ErrorItem { pub struct ErrorItem {
pub message: String, pub message: String,
} }
/// An item in agent's to-do list. /// An item in agent's to-do list.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TodoItem { pub struct TodoItem {
pub text: String, pub text: String,
pub completed: bool, pub completed: bool,
} }
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)] #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TodoListItem { pub struct TodoListItem {
pub items: Vec<TodoItem>, pub items: Vec<TodoItem>,
} }

View File

@@ -21,6 +21,8 @@ use codex_exec::exec_events::ItemCompletedEvent;
use codex_exec::exec_events::ItemStartedEvent; use codex_exec::exec_events::ItemStartedEvent;
use codex_exec::exec_events::ItemUpdatedEvent; use codex_exec::exec_events::ItemUpdatedEvent;
use codex_exec::exec_events::McpToolCallItem; use codex_exec::exec_events::McpToolCallItem;
use codex_exec::exec_events::McpToolCallItemError;
use codex_exec::exec_events::McpToolCallItemResult;
use codex_exec::exec_events::McpToolCallStatus; use codex_exec::exec_events::McpToolCallStatus;
use codex_exec::exec_events::PatchApplyStatus; use codex_exec::exec_events::PatchApplyStatus;
use codex_exec::exec_events::PatchChangeKind; use codex_exec::exec_events::PatchChangeKind;
@@ -41,7 +43,10 @@ use codex_protocol::plan_tool::PlanItemArg;
use codex_protocol::plan_tool::StepStatus; use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs; use codex_protocol::plan_tool::UpdatePlanArgs;
use mcp_types::CallToolResult; use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
use mcp_types::TextContent;
use pretty_assertions::assert_eq; use pretty_assertions::assert_eq;
use serde_json::json;
use std::path::PathBuf; use std::path::PathBuf;
use std::time::Duration; use std::time::Duration;
@@ -239,7 +244,7 @@ fn mcp_tool_call_begin_and_end_emit_item_events() {
let invocation = McpInvocation { let invocation = McpInvocation {
server: "server_a".to_string(), server: "server_a".to_string(),
tool: "tool_x".to_string(), tool: "tool_x".to_string(),
arguments: None, arguments: Some(json!({ "key": "value" })),
}; };
let begin = event( let begin = event(
@@ -258,6 +263,9 @@ fn mcp_tool_call_begin_and_end_emit_item_events() {
details: ThreadItemDetails::McpToolCall(McpToolCallItem { details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_a".to_string(), server: "server_a".to_string(),
tool: "tool_x".to_string(), tool: "tool_x".to_string(),
arguments: json!({ "key": "value" }),
result: None,
error: None,
status: McpToolCallStatus::InProgress, status: McpToolCallStatus::InProgress,
}), }),
}, },
@@ -286,6 +294,12 @@ fn mcp_tool_call_begin_and_end_emit_item_events() {
details: ThreadItemDetails::McpToolCall(McpToolCallItem { details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_a".to_string(), server: "server_a".to_string(),
tool: "tool_x".to_string(), tool: "tool_x".to_string(),
arguments: json!({ "key": "value" }),
result: Some(McpToolCallItemResult {
content: Vec::new(),
structured_content: None,
}),
error: None,
status: McpToolCallStatus::Completed, status: McpToolCallStatus::Completed,
}), }),
}, },
@@ -299,7 +313,7 @@ fn mcp_tool_call_failure_sets_failed_status() {
let invocation = McpInvocation { let invocation = McpInvocation {
server: "server_b".to_string(), server: "server_b".to_string(),
tool: "tool_y".to_string(), tool: "tool_y".to_string(),
arguments: None, arguments: Some(json!({ "param": 42 })),
}; };
let begin = event( let begin = event(
@@ -329,6 +343,11 @@ fn mcp_tool_call_failure_sets_failed_status() {
details: ThreadItemDetails::McpToolCall(McpToolCallItem { details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_b".to_string(), server: "server_b".to_string(),
tool: "tool_y".to_string(), tool: "tool_y".to_string(),
arguments: json!({ "param": 42 }),
result: None,
error: Some(McpToolCallItemError {
message: "tool exploded".to_string(),
}),
status: McpToolCallStatus::Failed, status: McpToolCallStatus::Failed,
}), }),
}, },
@@ -336,6 +355,83 @@ fn mcp_tool_call_failure_sets_failed_status() {
); );
} }
#[test]
fn mcp_tool_call_defaults_arguments_and_preserves_structured_content() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let invocation = McpInvocation {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: None,
};
let begin = event(
"m5",
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: "call-3".to_string(),
invocation: invocation.clone(),
}),
);
let begin_events = ep.collect_thread_events(&begin);
assert_eq!(
begin_events,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: serde_json::Value::Null,
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
},
})]
);
let end = event(
"m6",
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
call_id: "call-3".to_string(),
invocation,
duration: Duration::from_millis(10),
result: Ok(CallToolResult {
content: vec![ContentBlock::TextContent(TextContent {
annotations: None,
text: "done".to_string(),
r#type: "text".to_string(),
})],
is_error: None,
structured_content: Some(json!({ "status": "ok" })),
}),
}),
);
let events = ep.collect_thread_events(&end);
assert_eq!(
events,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: serde_json::Value::Null,
result: Some(McpToolCallItemResult {
content: vec![ContentBlock::TextContent(TextContent {
annotations: None,
text: "done".to_string(),
r#type: "text".to_string(),
})],
structured_content: Some(json!({ "status": "ok" })),
}),
error: None,
status: McpToolCallStatus::Completed,
}),
},
})]
);
}
#[test] #[test]
fn plan_update_after_complete_starts_new_todo_list_with_new_id() { fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
let mut ep = EventProcessorWithJsonOutput::new(None); let mut ep = EventProcessorWithJsonOutput::new(None);

581
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -45,6 +45,7 @@
"prepare": "pnpm run build" "prepare": "pnpm run build"
}, },
"devDependencies": { "devDependencies": {
"@modelcontextprotocol/sdk": "^1.20.2",
"@types/jest": "^29.5.14", "@types/jest": "^29.5.14",
"@types/node": "^20.19.18", "@types/node": "^20.19.18",
"eslint": "^9.36.0", "eslint": "^9.36.0",

View File

@@ -1,5 +1,7 @@
// based on item types from codex-rs/exec/src/exec_events.rs // based on item types from codex-rs/exec/src/exec_events.rs
import type { ContentBlock as McpContentBlock } from "@modelcontextprotocol/sdk/types.js";
/** The status of a command execution. */ /** The status of a command execution. */
export type CommandExecutionStatus = "in_progress" | "completed" | "failed"; export type CommandExecutionStatus = "in_progress" | "completed" | "failed";
@@ -53,6 +55,17 @@ export type McpToolCallItem = {
server: string; server: string;
/** The tool invoked on the MCP server. */ /** The tool invoked on the MCP server. */
tool: string; tool: string;
/** Arguments forwarded to the tool invocation. */
arguments: unknown;
/** Result payload returned by the MCP server for successful calls. */
result?: {
content: McpContentBlock[];
structured_content: unknown;
};
/** Error message reported for failed calls. */
error?: {
message: string;
};
/** Current status of the tool invocation. */ /** Current status of the tool invocation. */
status: McpToolCallStatus; status: McpToolCallStatus;
}; };