[MCP] Render MCP tool call result images to the model (#5600)
It's pretty amazing we have gotten here without the ability for the model to see image content from MCP tool calls. This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get adequete credit here but I also want to get this fix in ASAP so I gave him a week to update it and haven't gotten a response so I'm going to take it across the finish line. This test highlights how absured the current situation is. I asked the model to read this image using the Chrome MCP <img width="2378" height="674" alt="image" src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0" /> After this change, it correctly outputs: > Captured the page: image dhows a dark terminal-style UI labeled `OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and working directory `/codex/codex-rs` (and more) Before this change, it said: > Took the full-page screenshot you asked for. It shows a long, horizontally repeating pattern of stylized people in orange, light-blue, and mustard clothing, holding hands in alternating poses against a white background. No text or other graphics-just rows of flat illustration stretching off to the right. Without this change, the Figma, Playwright, Chrome, and other visual MCP servers are pretty much entirely useless. I tested this change with the openai respones api as well as a third party completions api
This commit is contained in:
@@ -40,7 +40,7 @@ pub fn stdio() -> (tokio::io::Stdin, tokio::io::Stdout) {
|
||||
}
|
||||
impl TestToolServer {
|
||||
fn new() -> Self {
|
||||
let tools = vec![Self::echo_tool()];
|
||||
let tools = vec![Self::echo_tool(), Self::image_tool()];
|
||||
let resources = vec![Self::memo_resource()];
|
||||
let resource_templates = vec![Self::memo_template()];
|
||||
Self {
|
||||
@@ -70,6 +70,22 @@ impl TestToolServer {
|
||||
)
|
||||
}
|
||||
|
||||
fn image_tool() -> Tool {
|
||||
#[expect(clippy::expect_used)]
|
||||
let schema: JsonObject = serde_json::from_value(serde_json::json!({
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"additionalProperties": false
|
||||
}))
|
||||
.expect("image tool schema should deserialize");
|
||||
|
||||
Tool::new(
|
||||
Cow::Borrowed("image"),
|
||||
Cow::Borrowed("Return a single image content block."),
|
||||
Arc::new(schema),
|
||||
)
|
||||
}
|
||||
|
||||
fn memo_resource() -> Resource {
|
||||
let raw = RawResource {
|
||||
uri: MEMO_URI.to_string(),
|
||||
@@ -214,6 +230,35 @@ impl ServerHandler for TestToolServer {
|
||||
meta: None,
|
||||
})
|
||||
}
|
||||
"image" => {
|
||||
// Read a data URL (e.g. data:image/png;base64,AAA...) from env and convert to
|
||||
// an MCP image content block. Tests set MCP_TEST_IMAGE_DATA_URL.
|
||||
let data_url = std::env::var("MCP_TEST_IMAGE_DATA_URL").map_err(|_| {
|
||||
McpError::invalid_params(
|
||||
"missing MCP_TEST_IMAGE_DATA_URL env var for image tool",
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
|
||||
fn parse_data_url(url: &str) -> Option<(String, String)> {
|
||||
let rest = url.strip_prefix("data:")?;
|
||||
let (mime_and_opts, data) = rest.split_once(',')?;
|
||||
let (mime, _opts) =
|
||||
mime_and_opts.split_once(';').unwrap_or((mime_and_opts, ""));
|
||||
Some((mime.to_string(), data.to_string()))
|
||||
}
|
||||
|
||||
let (mime_type, data_b64) = parse_data_url(&data_url).ok_or_else(|| {
|
||||
McpError::invalid_params(
|
||||
format!("invalid data URL for image tool: {data_url}"),
|
||||
None,
|
||||
)
|
||||
})?;
|
||||
|
||||
Ok(CallToolResult::success(vec![rmcp::model::Content::image(
|
||||
data_b64, mime_type,
|
||||
)]))
|
||||
}
|
||||
other => Err(McpError::invalid_params(
|
||||
format!("unknown tool: {other}"),
|
||||
None,
|
||||
|
||||
Reference in New Issue
Block a user