Add "View Image" tool (#2723)
Adds a "View Image" tool so Codex can find and see images by itself: <img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40 04 AM" src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03" />
This commit is contained in:
@@ -67,6 +67,7 @@ pub(crate) struct ToolsConfig {
|
||||
pub plan_tool: bool,
|
||||
pub apply_patch_tool_type: Option<ApplyPatchToolType>,
|
||||
pub web_search_request: bool,
|
||||
pub include_view_image_tool: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct ToolsConfigParams<'a> {
|
||||
@@ -77,6 +78,7 @@ pub(crate) struct ToolsConfigParams<'a> {
|
||||
pub(crate) include_apply_patch_tool: bool,
|
||||
pub(crate) include_web_search_request: bool,
|
||||
pub(crate) use_streamable_shell_tool: bool,
|
||||
pub(crate) include_view_image_tool: bool,
|
||||
}
|
||||
|
||||
impl ToolsConfig {
|
||||
@@ -89,6 +91,7 @@ impl ToolsConfig {
|
||||
include_apply_patch_tool,
|
||||
include_web_search_request,
|
||||
use_streamable_shell_tool,
|
||||
include_view_image_tool,
|
||||
} = params;
|
||||
let mut shell_type = if *use_streamable_shell_tool {
|
||||
ConfigShellToolType::StreamableShell
|
||||
@@ -120,6 +123,7 @@ impl ToolsConfig {
|
||||
plan_tool: *include_plan_tool,
|
||||
apply_patch_tool_type,
|
||||
web_search_request: *include_web_search_request,
|
||||
include_view_image_tool: *include_view_image_tool,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -292,6 +296,30 @@ The shell tool is used to execute shell commands.
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn create_view_image_tool() -> OpenAiTool {
|
||||
// Support only local filesystem path.
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"path".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Local filesystem path to an image file".to_string()),
|
||||
},
|
||||
);
|
||||
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "view_image".to_string(),
|
||||
description:
|
||||
"Attach a local image (by filesystem path) to the conversation context for this turn."
|
||||
.to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["path".to_string()]),
|
||||
additional_properties: Some(false),
|
||||
},
|
||||
})
|
||||
}
|
||||
/// TODO(dylan): deprecate once we get rid of json tool
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub(crate) struct ApplyPatchToolArgs {
|
||||
@@ -541,6 +569,11 @@ pub(crate) fn get_openai_tools(
|
||||
tools.push(OpenAiTool::WebSearch {});
|
||||
}
|
||||
|
||||
// Include the view_image tool so the agent can attach images to context.
|
||||
if config.include_view_image_tool {
|
||||
tools.push(create_view_image_tool());
|
||||
}
|
||||
|
||||
if let Some(mcp_tools) = mcp_tools {
|
||||
// Ensure deterministic ordering to maximize prompt cache hits.
|
||||
// HashMap iteration order is non-deterministic, so sort by fully-qualified tool name.
|
||||
@@ -604,10 +637,14 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
let tools = get_openai_tools(&config, Some(HashMap::new()));
|
||||
|
||||
assert_eq_tool_names(&tools, &["local_shell", "update_plan", "web_search"]);
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["local_shell", "update_plan", "web_search", "view_image"],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -621,10 +658,14 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
let tools = get_openai_tools(&config, Some(HashMap::new()));
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "update_plan", "web_search"]);
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["shell", "update_plan", "web_search", "view_image"],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -638,6 +679,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
let tools = get_openai_tools(
|
||||
&config,
|
||||
@@ -679,11 +721,16 @@ mod tests {
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["shell", "web_search", "test_server/do_something_cool"],
|
||||
&[
|
||||
"shell",
|
||||
"web_search",
|
||||
"view_image",
|
||||
"test_server/do_something_cool",
|
||||
],
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "test_server/do_something_cool".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -737,6 +784,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: false,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
// Intentionally construct a map with keys that would sort alphabetically.
|
||||
@@ -794,6 +842,7 @@ mod tests {
|
||||
&tools,
|
||||
&[
|
||||
"shell",
|
||||
"view_image",
|
||||
"test_server/cool",
|
||||
"test_server/do",
|
||||
"test_server/something",
|
||||
@@ -812,6 +861,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
let tools = get_openai_tools(
|
||||
@@ -837,10 +887,13 @@ mod tests {
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/search"]);
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["shell", "web_search", "view_image", "dash/search"],
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "dash/search".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -870,6 +923,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
let tools = get_openai_tools(
|
||||
@@ -893,9 +947,12 @@ mod tests {
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/paginate"]);
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["shell", "web_search", "view_image", "dash/paginate"],
|
||||
);
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "dash/paginate".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -923,6 +980,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
let tools = get_openai_tools(
|
||||
@@ -946,9 +1004,9 @@ mod tests {
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/tags"]);
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/tags"]);
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "dash/tags".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -979,6 +1037,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
let tools = get_openai_tools(
|
||||
@@ -1002,9 +1061,9 @@ mod tests {
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/value"]);
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/value"]);
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "dash/value".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
|
||||
Reference in New Issue
Block a user