[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the model to see image content from MCP tool calls. This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get adequete credit here but I also want to get this fix in ASAP so I gave him a week to update it and haven't gotten a response so I'm going to take it across the finish line. This test highlights how absured the current situation is. I asked the model to read this image using the Chrome MCP <img width="2378" height="674" alt="image" src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0" /> After this change, it correctly outputs: > Captured the page: image dhows a dark terminal-style UI labeled `OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and working directory `/codex/codex-rs` (and more) Before this change, it said: > Took the full-page screenshot you asked for. It shows a long, horizontally repeating pattern of stylized people in orange, light-blue, and mustard clothing, holding hands in alternating poses against a white background. No text or other graphics-just rows of flat illustration stretching off to the right. Without this change, the Figma, Playwright, Chrome, and other visual MCP servers are pretty much entirely useless. I tested this change with the openai respones api as well as a third party completions api
2025-10-27 14:55:57 -07:00
parent 67a219ffc2
commit b0bdc04c30
24 changed files with 749 additions and 108 deletions
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -5,6 +5,7 @@ use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
 use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
 use crate::turn_diff_tracker::TurnDiffTracker;
 use codex_otel::otel_event_manager::OtelEventManager;
+use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseInputItem;
 use codex_protocol::models::ShellToolCallParams;
@@ -65,7 +66,10 @@ impl ToolPayload {
 #[derive(Clone)]
 pub enum ToolOutput {
    Function {
+        // Plain text representation of the tool output.
        content: String,
+        // Some tool calls such as MCP calls may return structured content that can get parsed into an array of polymorphic content items.
+        content_items: Option<Vec<FunctionCallOutputContentItem>>,
        success: Option<bool>,
    },
    Mcp {
@@ -90,7 +94,11 @@ impl ToolOutput {

    pub fn into_response(self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem {
        match self {
-            ToolOutput::Function { content, success } => {
+            ToolOutput::Function {
+                content,
+                content_items,
+                success,
+            } => {
                if matches!(payload, ToolPayload::Custom { .. }) {
                    ResponseInputItem::CustomToolCallOutput {
                        call_id: call_id.to_string(),
@@ -99,7 +107,11 @@ impl ToolOutput {
                } else {
                    ResponseInputItem::FunctionCallOutput {
                        call_id: call_id.to_string(),
-                        output: FunctionCallOutputPayload { content, success },
+                        output: FunctionCallOutputPayload {
+                            content,
+                            content_items,
+                            success,
+                        },
                    }
                }
            }
@@ -163,6 +175,7 @@ mod tests {
        };
        let response = ToolOutput::Function {
            content: "patched".to_string(),
+            content_items: None,
            success: Some(true),
        }
        .into_response("call-42", &payload);
@@ -183,6 +196,7 @@ mod tests {
        };
        let response = ToolOutput::Function {
            content: "ok".to_string(),
+            content_items: None,
            success: Some(true),
        }
        .into_response("fn-1", &payload);
@@ -191,6 +205,7 @@ mod tests {
            ResponseInputItem::FunctionCallOutput { call_id, output } => {
                assert_eq!(call_id, "fn-1");
                assert_eq!(output.content, "ok");
+                assert!(output.content_items.is_none());
                assert_eq!(output.success, Some(true));
            }
            other => panic!("expected FunctionCallOutput, got {other:?}"),
--- a/codex-rs/core/src/tools/handlers/apply_patch.rs
+++ b/codex-rs/core/src/tools/handlers/apply_patch.rs
@@ -82,6 +82,7 @@ impl ToolHandler for ApplyPatchHandler {
                        let content = item?;
                        Ok(ToolOutput::Function {
                            content,
+                            content_items: None,
                            success: Some(true),
                        })
                    }
@@ -126,6 +127,7 @@ impl ToolHandler for ApplyPatchHandler {
                        let content = emitter.finish(event_ctx, out).await?;
                        Ok(ToolOutput::Function {
                            content,
+                            content_items: None,
                            success: Some(true),
                        })
                    }
--- a/codex-rs/core/src/tools/handlers/grep_files.rs
+++ b/codex-rs/core/src/tools/handlers/grep_files.rs
@@ -90,11 +90,13 @@ impl ToolHandler for GrepFilesHandler {
        if search_results.is_empty() {
            Ok(ToolOutput::Function {
                content: "No matches found.".to_string(),
+                content_items: None,
                success: Some(false),
            })
        } else {
            Ok(ToolOutput::Function {
                content: search_results.join("\n"),
+                content_items: None,
                success: Some(true),
            })
        }
--- a/codex-rs/core/src/tools/handlers/list_dir.rs
+++ b/codex-rs/core/src/tools/handlers/list_dir.rs
@@ -106,6 +106,7 @@ impl ToolHandler for ListDirHandler {
        output.extend(entries);
        Ok(ToolOutput::Function {
            content: output.join("\n"),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/mcp.rs
+++ b/codex-rs/core/src/tools/handlers/mcp.rs
@@ -56,8 +56,16 @@ impl ToolHandler for McpHandler {
                Ok(ToolOutput::Mcp { result })
            }
            codex_protocol::models::ResponseInputItem::FunctionCallOutput { output, .. } => {
-                let codex_protocol::models::FunctionCallOutputPayload { content, success } = output;
-                Ok(ToolOutput::Function { content, success })
+                let codex_protocol::models::FunctionCallOutputPayload {
+                    content,
+                    content_items,
+                    success,
+                } = output;
+                Ok(ToolOutput::Function {
+                    content,
+                    content_items,
+                    success,
+                })
            }
            _ => Err(FunctionCallError::RespondToModel(
                "mcp handler received unexpected response variant".to_string(),
--- a/codex-rs/core/src/tools/handlers/mcp_resource.rs
+++ b/codex-rs/core/src/tools/handlers/mcp_resource.rs
@@ -297,7 +297,10 @@ async fn handle_list_resources(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -403,7 +406,10 @@ async fn handle_list_resource_templates(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -489,7 +495,10 @@ async fn handle_read_resource(
    match payload_result {
        Ok(payload) => match serialize_function_output(payload) {
            Ok(output) => {
-                let ToolOutput::Function { content, success } = &output else {
+                let ToolOutput::Function {
+                    content, success, ..
+                } = &output
+                else {
                    unreachable!("MCP resource handler should return function output");
                };
                let duration = start.elapsed();
@@ -618,6 +627,7 @@ where

    Ok(ToolOutput::Function {
        content,
+        content_items: None,
        success: Some(true),
    })
 }
--- a/codex-rs/core/src/tools/handlers/plan.rs
+++ b/codex-rs/core/src/tools/handlers/plan.rs
@@ -88,6 +88,7 @@ impl ToolHandler for PlanHandler {

        Ok(ToolOutput::Function {
            content,
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/read_file.rs
+++ b/codex-rs/core/src/tools/handlers/read_file.rs
@@ -149,6 +149,7 @@ impl ToolHandler for ReadFileHandler {
        };
        Ok(ToolOutput::Function {
            content: collected.join("\n"),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/shell.rs
+++ b/codex-rs/core/src/tools/handlers/shell.rs
@@ -136,6 +136,7 @@ impl ShellHandler {
                        let content = item?;
                        return Ok(ToolOutput::Function {
                            content,
+                            content_items: None,
                            success: Some(true),
                        });
                    }
@@ -179,6 +180,7 @@ impl ShellHandler {
                        let content = emitter.finish(event_ctx, out).await?;
                        return Ok(ToolOutput::Function {
                            content,
+                            content_items: None,
                            success: Some(true),
                        });
                    }
@@ -226,6 +228,7 @@ impl ShellHandler {
        let content = emitter.finish(event_ctx, out).await?;
        Ok(ToolOutput::Function {
            content,
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/test_sync.rs
+++ b/codex-rs/core/src/tools/handlers/test_sync.rs
@@ -95,6 +95,7 @@ impl ToolHandler for TestSyncHandler {

        Ok(ToolOutput::Function {
            content: "ok".to_string(),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -171,6 +171,7 @@ impl ToolHandler for UnifiedExecHandler {

        Ok(ToolOutput::Function {
            content,
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/handlers/view_image.rs
+++ b/codex-rs/core/src/tools/handlers/view_image.rs
@@ -85,6 +85,7 @@ impl ToolHandler for ViewImageHandler {

        Ok(ToolOutput::Function {
            content: "attached local image path".to_string(),
+            content_items: None,
            success: Some(true),
        })
    }
--- a/codex-rs/core/src/tools/parallel.rs
+++ b/codex-rs/core/src/tools/parallel.rs
@@ -105,7 +105,7 @@ impl ToolCallRuntime {
                call_id: call.call_id.clone(),
                output: FunctionCallOutputPayload {
                    content: "aborted".to_string(),
-                    success: None,
+                    ..Default::default()
                },
            },
        }
--- a/codex-rs/core/src/tools/router.rs
+++ b/codex-rs/core/src/tools/router.rs
@@ -181,6 +181,7 @@ impl ToolRouter {
                output: codex_protocol::models::FunctionCallOutputPayload {
                    content: message,
                    success: Some(false),
+                    ..Default::default()
                },
            }
        }