Add "View Image" tool (#2723)

Adds a "View Image" tool so Codex can find and see images by itself:

<img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40
04 AM"
src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03"
/>
This commit is contained in:
dedrisian-oai
2025-08-27 17:41:23 -07:00
committed by GitHub
parent 3e309805ae
commit 4e9ad23864
8 changed files with 126 additions and 13 deletions

View File

@@ -67,6 +67,7 @@ pub(crate) struct ToolsConfig {
pub plan_tool: bool,
pub apply_patch_tool_type: Option<ApplyPatchToolType>,
pub web_search_request: bool,
pub include_view_image_tool: bool,
}
pub(crate) struct ToolsConfigParams<'a> {
@@ -77,6 +78,7 @@ pub(crate) struct ToolsConfigParams<'a> {
pub(crate) include_apply_patch_tool: bool,
pub(crate) include_web_search_request: bool,
pub(crate) use_streamable_shell_tool: bool,
pub(crate) include_view_image_tool: bool,
}
impl ToolsConfig {
@@ -89,6 +91,7 @@ impl ToolsConfig {
include_apply_patch_tool,
include_web_search_request,
use_streamable_shell_tool,
include_view_image_tool,
} = params;
let mut shell_type = if *use_streamable_shell_tool {
ConfigShellToolType::StreamableShell
@@ -120,6 +123,7 @@ impl ToolsConfig {
plan_tool: *include_plan_tool,
apply_patch_tool_type,
web_search_request: *include_web_search_request,
include_view_image_tool: *include_view_image_tool,
}
}
}
@@ -292,6 +296,30 @@ The shell tool is used to execute shell commands.
},
})
}
fn create_view_image_tool() -> OpenAiTool {
// Support only local filesystem path.
let mut properties = BTreeMap::new();
properties.insert(
"path".to_string(),
JsonSchema::String {
description: Some("Local filesystem path to an image file".to_string()),
},
);
OpenAiTool::Function(ResponsesApiTool {
name: "view_image".to_string(),
description:
"Attach a local image (by filesystem path) to the conversation context for this turn."
.to_string(),
strict: false,
parameters: JsonSchema::Object {
properties,
required: Some(vec!["path".to_string()]),
additional_properties: Some(false),
},
})
}
/// TODO(dylan): deprecate once we get rid of json tool
#[derive(Serialize, Deserialize)]
pub(crate) struct ApplyPatchToolArgs {
@@ -541,6 +569,11 @@ pub(crate) fn get_openai_tools(
tools.push(OpenAiTool::WebSearch {});
}
// Include the view_image tool so the agent can attach images to context.
if config.include_view_image_tool {
tools.push(create_view_image_tool());
}
if let Some(mcp_tools) = mcp_tools {
// Ensure deterministic ordering to maximize prompt cache hits.
// HashMap iteration order is non-deterministic, so sort by fully-qualified tool name.
@@ -604,10 +637,14 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(&config, Some(HashMap::new()));
assert_eq_tool_names(&tools, &["local_shell", "update_plan", "web_search"]);
assert_eq_tool_names(
&tools,
&["local_shell", "update_plan", "web_search", "view_image"],
);
}
#[test]
@@ -621,10 +658,14 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(&config, Some(HashMap::new()));
assert_eq_tool_names(&tools, &["shell", "update_plan", "web_search"]);
assert_eq_tool_names(
&tools,
&["shell", "update_plan", "web_search", "view_image"],
);
}
#[test]
@@ -638,6 +679,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(
&config,
@@ -679,11 +721,16 @@ mod tests {
assert_eq_tool_names(
&tools,
&["shell", "web_search", "test_server/do_something_cool"],
&[
"shell",
"web_search",
"view_image",
"test_server/do_something_cool",
],
);
assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "test_server/do_something_cool".to_string(),
parameters: JsonSchema::Object {
@@ -737,6 +784,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: false,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
// Intentionally construct a map with keys that would sort alphabetically.
@@ -794,6 +842,7 @@ mod tests {
&tools,
&[
"shell",
"view_image",
"test_server/cool",
"test_server/do",
"test_server/something",
@@ -812,6 +861,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(
@@ -837,10 +887,13 @@ mod tests {
)])),
);
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/search"]);
assert_eq_tool_names(
&tools,
&["shell", "web_search", "view_image", "dash/search"],
);
assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "dash/search".to_string(),
parameters: JsonSchema::Object {
@@ -870,6 +923,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(
@@ -893,9 +947,12 @@ mod tests {
)])),
);
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/paginate"]);
assert_eq_tool_names(
&tools,
&["shell", "web_search", "view_image", "dash/paginate"],
);
assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "dash/paginate".to_string(),
parameters: JsonSchema::Object {
@@ -923,6 +980,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(
@@ -946,9 +1004,9 @@ mod tests {
)])),
);
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/tags"]);
assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/tags"]);
assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "dash/tags".to_string(),
parameters: JsonSchema::Object {
@@ -979,6 +1037,7 @@ mod tests {
include_apply_patch_tool: false,
include_web_search_request: true,
use_streamable_shell_tool: false,
include_view_image_tool: true,
});
let tools = get_openai_tools(
@@ -1002,9 +1061,9 @@ mod tests {
)])),
);
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/value"]);
assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/value"]);
assert_eq!(
tools[2],
tools[3],
OpenAiTool::Function(ResponsesApiTool {
name: "dash/value".to_string(),
parameters: JsonSchema::Object {