fix: introduce ResponseInputItem::McpToolCallOutput variant (#1151)
The output of an MCP server tool call can be one of several types, but to date, we treated all outputs as text by showing the serialized JSON as the "tool output" in Codex:25a9949c49/codex-rs/mcp-types/src/lib.rs (L96-L101)This PR adds support for the `ImageContent` variant so we can now display an image output from an MCP tool call. In making this change, we introduce a new `ResponseInputItem::McpToolCallOutput` variant so that we can work with the `mcp_types::CallToolResult` directly when the function call is made to an MCP server. Though arguably the more significant change is the introduction of `HistoryCell::CompletedMcpToolCallWithImageOutput`, which is a cell that uses `ratatui_image` to render an image into the terminal. To support this, we introduce `ImageRenderCache`, cache a `ratatui_image::picker::Picker`, and `ensure_image_cache()` to cache the appropriate scaled image data and dimensions based on the current terminal size. To test, I created a minimal `package.json`: ```json { "name": "kitty-mcp", "version": "1.0.0", "type": "module", "description": "MCP that returns image of kitty", "main": "index.js", "dependencies": { "@modelcontextprotocol/sdk": "^1.12.0" } } ``` with the following `index.js` to define the MCP server: ```js #!/usr/bin/env node import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { readFile } from "node:fs/promises"; import { join } from "node:path"; const IMAGE_URI = "image://Ada.png"; const server = new McpServer({ name: "Demo", version: "1.0.0", }); server.tool( "get-cat-image", "If you need a cat image, this tool will provide one.", async () => ({ content: [ { type: "image", data: await getAdaPngBase64(), mimeType: "image/png" }, ], }) ); server.resource("Ada the Cat", IMAGE_URI, async (uri) => { const base64Image = await getAdaPngBase64(); return { contents: [ { uri: uri.href, mimeType: "image/png", blob: base64Image, }, ], }; }); async function getAdaPngBase64() { const __dirname = new URL(".", import.meta.url).pathname; // From9705ce2c59/assets/Ada.pngconst filePath = join(__dirname, "Ada.png"); const imageData = await readFile(filePath); const base64Image = imageData.toString("base64"); return base64Image; } const transport = new StdioServerTransport(); await server.connect(transport); ``` With the local changes from this PR, I added the following to my `config.toml`: ```toml [mcp_servers.kitty] command = "node" args = ["/Users/mbolin/code/kitty-mcp/index.js"] ``` Running the TUI from source: ``` cargo run --bin codex -- --model o3 'I need a picture of a cat' ``` I get: <img width="732" alt="image" src="https://github.com/user-attachments/assets/bf80b721-9ca0-4d81-aec7-77d6899e2869" /> Now, that said, I have only tested in iTerm and there is definitely some funny business with getting an accurate character-to-pixel ratio (sometimes the `CompletedMcpToolCallWithImageOutput` thinks it needs 10 rows to render instead of 4), so there is still work to be done here.
This commit is contained in:
@@ -242,11 +242,9 @@ impl EventProcessor {
|
||||
invocation.style(self.bold),
|
||||
);
|
||||
}
|
||||
EventMsg::McpToolCallEnd(McpToolCallEndEvent {
|
||||
call_id,
|
||||
success,
|
||||
result,
|
||||
}) => {
|
||||
EventMsg::McpToolCallEnd(tool_call_end_event) => {
|
||||
let is_success = tool_call_end_event.is_success();
|
||||
let McpToolCallEndEvent { call_id, result } = tool_call_end_event;
|
||||
// Retrieve start time and invocation for duration calculation and labeling.
|
||||
let info = self.call_id_to_tool_call.remove(&call_id);
|
||||
|
||||
@@ -261,13 +259,13 @@ impl EventProcessor {
|
||||
(String::new(), format!("tool('{call_id}')"))
|
||||
};
|
||||
|
||||
let status_str = if success { "success" } else { "failed" };
|
||||
let title_style = if success { self.green } else { self.red };
|
||||
let status_str = if is_success { "success" } else { "failed" };
|
||||
let title_style = if is_success { self.green } else { self.red };
|
||||
let title = format!("{invocation} {status_str}{duration}:");
|
||||
|
||||
ts_println!("{}", title.style(title_style));
|
||||
|
||||
if let Some(res) = result {
|
||||
if let Ok(res) = result {
|
||||
let val: serde_json::Value = res.into();
|
||||
let pretty =
|
||||
serde_json::to_string_pretty(&val).unwrap_or_else(|_| val.to_string());
|
||||
|
||||
Reference in New Issue
Block a user