[exec] Add MCP tool arguments and results (#5899)

Extends mcp_tool_call item to include arguments and results.
This commit is contained in:
pakrym-oai
2025-10-29 14:23:57 -07:00
committed by GitHub
parent 13e1d0362d
commit 815ae4164a
7 changed files with 784 additions and 39 deletions

View File

@@ -26,6 +26,7 @@ codex-common = { workspace = true, features = [
codex-core = { workspace = true }
codex-ollama = { workspace = true }
codex-protocol = { workspace = true }
mcp-types = { workspace = true }
opentelemetry-appender-tracing = { workspace = true }
owo-colors = { workspace = true }
serde = { workspace = true, features = ["derive"] }

View File

@@ -14,6 +14,8 @@ use crate::exec_events::ItemCompletedEvent;
use crate::exec_events::ItemStartedEvent;
use crate::exec_events::ItemUpdatedEvent;
use crate::exec_events::McpToolCallItem;
use crate::exec_events::McpToolCallItemError;
use crate::exec_events::McpToolCallItemResult;
use crate::exec_events::McpToolCallStatus;
use crate::exec_events::PatchApplyStatus;
use crate::exec_events::PatchChangeKind;
@@ -48,6 +50,7 @@ use codex_core::protocol::TaskStartedEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use serde_json::Value as JsonValue;
use tracing::error;
use tracing::warn;
@@ -81,6 +84,7 @@ struct RunningMcpToolCall {
server: String,
tool: String,
item_id: String,
arguments: JsonValue,
}
impl EventProcessorWithJsonOutput {
@@ -220,6 +224,7 @@ impl EventProcessorWithJsonOutput {
let item_id = self.get_next_item_id();
let server = ev.invocation.server.clone();
let tool = ev.invocation.tool.clone();
let arguments = ev.invocation.arguments.clone().unwrap_or(JsonValue::Null);
self.running_mcp_tool_calls.insert(
ev.call_id.clone(),
@@ -227,6 +232,7 @@ impl EventProcessorWithJsonOutput {
server: server.clone(),
tool: tool.clone(),
item_id: item_id.clone(),
arguments: arguments.clone(),
},
);
@@ -235,6 +241,9 @@ impl EventProcessorWithJsonOutput {
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server,
tool,
arguments,
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
};
@@ -249,19 +258,42 @@ impl EventProcessorWithJsonOutput {
McpToolCallStatus::Failed
};
let (server, tool, item_id) = match self.running_mcp_tool_calls.remove(&ev.call_id) {
Some(running) => (running.server, running.tool, running.item_id),
None => {
warn!(
call_id = ev.call_id,
"Received McpToolCallEnd without begin; synthesizing new item"
);
(
ev.invocation.server.clone(),
ev.invocation.tool.clone(),
self.get_next_item_id(),
)
let (server, tool, item_id, arguments) =
match self.running_mcp_tool_calls.remove(&ev.call_id) {
Some(running) => (
running.server,
running.tool,
running.item_id,
running.arguments,
),
None => {
warn!(
call_id = ev.call_id,
"Received McpToolCallEnd without begin; synthesizing new item"
);
(
ev.invocation.server.clone(),
ev.invocation.tool.clone(),
self.get_next_item_id(),
ev.invocation.arguments.clone().unwrap_or(JsonValue::Null),
)
}
};
let (result, error) = match &ev.result {
Ok(value) => {
let result = McpToolCallItemResult {
content: value.content.clone(),
structured_content: value.structured_content.clone(),
};
(Some(result), None)
}
Err(message) => (
None,
Some(McpToolCallItemError {
message: message.clone(),
}),
),
};
let item = ThreadItem {
@@ -269,6 +301,9 @@ impl EventProcessorWithJsonOutput {
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server,
tool,
arguments,
result,
error,
status,
}),
};

View File

@@ -1,9 +1,11 @@
use mcp_types::ContentBlock as McpContentBlock;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value as JsonValue;
use ts_rs::TS;
/// Top-level JSONL events emitted by codex exec
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(tag = "type")]
pub enum ThreadEvent {
/// Emitted when a new thread is started as the first event.
@@ -33,28 +35,28 @@ pub enum ThreadEvent {
Error(ThreadErrorEvent),
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadStartedEvent {
/// The identified of the new thread. Can be used to resume the thread later.
pub thread_id: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
pub struct TurnStartedEvent {}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TurnCompletedEvent {
pub usage: Usage,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TurnFailedEvent {
pub error: ThreadErrorEvent,
}
/// Describes the usage of tokens during a turn.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS, Default)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS, Default)]
pub struct Usage {
/// The number of input tokens used during the turn.
pub input_tokens: i64,
@@ -64,29 +66,29 @@ pub struct Usage {
pub output_tokens: i64,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemStartedEvent {
pub item: ThreadItem,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemCompletedEvent {
pub item: ThreadItem,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ItemUpdatedEvent {
pub item: ThreadItem,
}
/// Fatal error emitted by the stream.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadErrorEvent {
pub message: String,
}
/// Canonical representation of a thread item and its domain-specific payload.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ThreadItem {
pub id: String,
#[serde(flatten)]
@@ -94,7 +96,7 @@ pub struct ThreadItem {
}
/// Typed payloads for each supported thread item type.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ThreadItemDetails {
/// Response from the agent.
@@ -123,13 +125,13 @@ pub enum ThreadItemDetails {
/// Response from the agent.
/// Either a natural-language response or a JSON string when structured output is requested.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct AgentMessageItem {
pub text: String,
}
/// Agent's reasoning summary.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ReasoningItem {
pub text: String,
}
@@ -145,24 +147,23 @@ pub enum CommandExecutionStatus {
}
/// A command executed by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct CommandExecutionItem {
pub command: String,
pub aggregated_output: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub exit_code: Option<i32>,
pub status: CommandExecutionStatus,
}
/// A set of file changes by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct FileUpdateChange {
pub path: String,
pub kind: PatchChangeKind,
}
/// The status of a file change.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")]
pub enum PatchApplyStatus {
Completed,
@@ -170,14 +171,14 @@ pub enum PatchApplyStatus {
}
/// A set of file changes by the agent.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct FileChangeItem {
pub changes: Vec<FileUpdateChange>,
pub status: PatchApplyStatus,
}
/// Indicates the type of the file change.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")]
pub enum PatchChangeKind {
Add,
@@ -195,34 +196,51 @@ pub enum McpToolCallStatus {
Failed,
}
/// Result payload produced by an MCP tool invocation.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItemResult {
pub content: Vec<McpContentBlock>,
pub structured_content: Option<JsonValue>,
}
/// Error details reported by a failed MCP tool invocation.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItemError {
pub message: String,
}
/// A call to an MCP tool.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct McpToolCallItem {
pub server: String,
pub tool: String,
#[serde(default)]
pub arguments: JsonValue,
pub result: Option<McpToolCallItemResult>,
pub error: Option<McpToolCallItemError>,
pub status: McpToolCallStatus,
}
/// A web search request.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct WebSearchItem {
pub query: String,
}
/// An error notification.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct ErrorItem {
pub message: String,
}
/// An item in agent's to-do list.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TodoItem {
pub text: String,
pub completed: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, TS)]
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TS)]
pub struct TodoListItem {
pub items: Vec<TodoItem>,
}

View File

@@ -21,6 +21,8 @@ use codex_exec::exec_events::ItemCompletedEvent;
use codex_exec::exec_events::ItemStartedEvent;
use codex_exec::exec_events::ItemUpdatedEvent;
use codex_exec::exec_events::McpToolCallItem;
use codex_exec::exec_events::McpToolCallItemError;
use codex_exec::exec_events::McpToolCallItemResult;
use codex_exec::exec_events::McpToolCallStatus;
use codex_exec::exec_events::PatchApplyStatus;
use codex_exec::exec_events::PatchChangeKind;
@@ -41,7 +43,10 @@ use codex_protocol::plan_tool::PlanItemArg;
use codex_protocol::plan_tool::StepStatus;
use codex_protocol::plan_tool::UpdatePlanArgs;
use mcp_types::CallToolResult;
use mcp_types::ContentBlock;
use mcp_types::TextContent;
use pretty_assertions::assert_eq;
use serde_json::json;
use std::path::PathBuf;
use std::time::Duration;
@@ -239,7 +244,7 @@ fn mcp_tool_call_begin_and_end_emit_item_events() {
let invocation = McpInvocation {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: None,
arguments: Some(json!({ "key": "value" })),
};
let begin = event(
@@ -258,6 +263,9 @@ fn mcp_tool_call_begin_and_end_emit_item_events() {
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: json!({ "key": "value" }),
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
},
@@ -286,6 +294,12 @@ fn mcp_tool_call_begin_and_end_emit_item_events() {
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_a".to_string(),
tool: "tool_x".to_string(),
arguments: json!({ "key": "value" }),
result: Some(McpToolCallItemResult {
content: Vec::new(),
structured_content: None,
}),
error: None,
status: McpToolCallStatus::Completed,
}),
},
@@ -299,7 +313,7 @@ fn mcp_tool_call_failure_sets_failed_status() {
let invocation = McpInvocation {
server: "server_b".to_string(),
tool: "tool_y".to_string(),
arguments: None,
arguments: Some(json!({ "param": 42 })),
};
let begin = event(
@@ -329,6 +343,11 @@ fn mcp_tool_call_failure_sets_failed_status() {
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_b".to_string(),
tool: "tool_y".to_string(),
arguments: json!({ "param": 42 }),
result: None,
error: Some(McpToolCallItemError {
message: "tool exploded".to_string(),
}),
status: McpToolCallStatus::Failed,
}),
},
@@ -336,6 +355,83 @@ fn mcp_tool_call_failure_sets_failed_status() {
);
}
#[test]
fn mcp_tool_call_defaults_arguments_and_preserves_structured_content() {
let mut ep = EventProcessorWithJsonOutput::new(None);
let invocation = McpInvocation {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: None,
};
let begin = event(
"m5",
EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
call_id: "call-3".to_string(),
invocation: invocation.clone(),
}),
);
let begin_events = ep.collect_thread_events(&begin);
assert_eq!(
begin_events,
vec![ThreadEvent::ItemStarted(ItemStartedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: serde_json::Value::Null,
result: None,
error: None,
status: McpToolCallStatus::InProgress,
}),
},
})]
);
let end = event(
"m6",
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
call_id: "call-3".to_string(),
invocation,
duration: Duration::from_millis(10),
result: Ok(CallToolResult {
content: vec![ContentBlock::TextContent(TextContent {
annotations: None,
text: "done".to_string(),
r#type: "text".to_string(),
})],
is_error: None,
structured_content: Some(json!({ "status": "ok" })),
}),
}),
);
let events = ep.collect_thread_events(&end);
assert_eq!(
events,
vec![ThreadEvent::ItemCompleted(ItemCompletedEvent {
item: ThreadItem {
id: "item_0".to_string(),
details: ThreadItemDetails::McpToolCall(McpToolCallItem {
server: "server_c".to_string(),
tool: "tool_z".to_string(),
arguments: serde_json::Value::Null,
result: Some(McpToolCallItemResult {
content: vec![ContentBlock::TextContent(TextContent {
annotations: None,
text: "done".to_string(),
r#type: "text".to_string(),
})],
structured_content: Some(json!({ "status": "ok" })),
}),
error: None,
status: McpToolCallStatus::Completed,
}),
},
})]
);
}
#[test]
fn plan_update_after_complete_starts_new_todo_list_with_new_id() {
let mut ep = EventProcessorWithJsonOutput::new(None);

581
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -45,6 +45,7 @@
"prepare": "pnpm run build"
},
"devDependencies": {
"@modelcontextprotocol/sdk": "^1.20.2",
"@types/jest": "^29.5.14",
"@types/node": "^20.19.18",
"eslint": "^9.36.0",

View File

@@ -1,5 +1,7 @@
// based on item types from codex-rs/exec/src/exec_events.rs
import type { ContentBlock as McpContentBlock } from "@modelcontextprotocol/sdk/types.js";
/** The status of a command execution. */
export type CommandExecutionStatus = "in_progress" | "completed" | "failed";
@@ -53,6 +55,17 @@ export type McpToolCallItem = {
server: string;
/** The tool invoked on the MCP server. */
tool: string;
/** Arguments forwarded to the tool invocation. */
arguments: unknown;
/** Result payload returned by the MCP server for successful calls. */
result?: {
content: McpContentBlock[];
structured_content: unknown;
};
/** Error message reported for failed calls. */
error?: {
message: string;
};
/** Current status of the tool invocation. */
status: McpToolCallStatus;
};