Add "View Image" tool (#2723)
Adds a "View Image" tool so Codex can find and see images by itself: <img width="1772" height="420" alt="Screenshot 2025-08-26 at 10 40 04 AM" src="https://github.com/user-attachments/assets/7a459c7b-0b86-4125-82d9-05fbb35ade03" />
This commit is contained in:
@@ -518,6 +518,7 @@ impl Session {
|
||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||
include_web_search_request: config.tools_web_search_request,
|
||||
use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
|
||||
include_view_image_tool: config.include_view_image_tool,
|
||||
}),
|
||||
user_instructions,
|
||||
base_instructions,
|
||||
@@ -1108,6 +1109,7 @@ async fn submission_loop(
|
||||
include_apply_patch_tool: config.include_apply_patch_tool,
|
||||
include_web_search_request: config.tools_web_search_request,
|
||||
use_streamable_shell_tool: config.use_experimental_streamable_shell_tool,
|
||||
include_view_image_tool: config.include_view_image_tool,
|
||||
});
|
||||
|
||||
let new_turn_context = TurnContext {
|
||||
@@ -1193,6 +1195,7 @@ async fn submission_loop(
|
||||
include_web_search_request: config.tools_web_search_request,
|
||||
use_streamable_shell_tool: config
|
||||
.use_experimental_streamable_shell_tool,
|
||||
include_view_image_tool: config.include_view_image_tool,
|
||||
}),
|
||||
user_instructions: turn_context.user_instructions.clone(),
|
||||
base_instructions: turn_context.base_instructions.clone(),
|
||||
@@ -2077,6 +2080,36 @@ async fn handle_function_call(
|
||||
)
|
||||
.await
|
||||
}
|
||||
"view_image" => {
|
||||
#[derive(serde::Deserialize)]
|
||||
struct SeeImageArgs {
|
||||
path: String,
|
||||
}
|
||||
let args = match serde_json::from_str::<SeeImageArgs>(&arguments) {
|
||||
Ok(a) => a,
|
||||
Err(e) => {
|
||||
return ResponseInputItem::FunctionCallOutput {
|
||||
call_id,
|
||||
output: FunctionCallOutputPayload {
|
||||
content: format!("failed to parse function arguments: {e}"),
|
||||
success: Some(false),
|
||||
},
|
||||
};
|
||||
}
|
||||
};
|
||||
let abs = turn_context.resolve_path(Some(args.path));
|
||||
let output = match sess.inject_input(vec![InputItem::LocalImage { path: abs }]) {
|
||||
Ok(()) => FunctionCallOutputPayload {
|
||||
content: "attached local image path".to_string(),
|
||||
success: Some(true),
|
||||
},
|
||||
Err(_) => FunctionCallOutputPayload {
|
||||
content: "unable to attach image (no active task)".to_string(),
|
||||
success: Some(false),
|
||||
},
|
||||
};
|
||||
ResponseInputItem::FunctionCallOutput { call_id, output }
|
||||
}
|
||||
"apply_patch" => {
|
||||
let args = match serde_json::from_str::<ApplyPatchToolArgs>(&arguments) {
|
||||
Ok(a) => a,
|
||||
|
||||
@@ -178,6 +178,9 @@ pub struct Config {
|
||||
pub preferred_auth_method: AuthMode,
|
||||
|
||||
pub use_experimental_streamable_shell_tool: bool,
|
||||
|
||||
/// Include the `view_image` tool that lets the agent attach a local image path to context.
|
||||
pub include_view_image_tool: bool,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
@@ -497,6 +500,10 @@ pub struct ToolsToml {
|
||||
// Renamed from `web_search_request`; keep alias for backwards compatibility.
|
||||
#[serde(default, alias = "web_search_request")]
|
||||
pub web_search: Option<bool>,
|
||||
|
||||
/// Enable the `view_image` tool that lets the agent attach local images.
|
||||
#[serde(default)]
|
||||
pub view_image: Option<bool>,
|
||||
}
|
||||
|
||||
impl ConfigToml {
|
||||
@@ -586,6 +593,7 @@ pub struct ConfigOverrides {
|
||||
pub base_instructions: Option<String>,
|
||||
pub include_plan_tool: Option<bool>,
|
||||
pub include_apply_patch_tool: Option<bool>,
|
||||
pub include_view_image_tool: Option<bool>,
|
||||
pub disable_response_storage: Option<bool>,
|
||||
pub show_raw_agent_reasoning: Option<bool>,
|
||||
pub tools_web_search_request: Option<bool>,
|
||||
@@ -613,6 +621,7 @@ impl Config {
|
||||
base_instructions,
|
||||
include_plan_tool,
|
||||
include_apply_patch_tool,
|
||||
include_view_image_tool,
|
||||
disable_response_storage,
|
||||
show_raw_agent_reasoning,
|
||||
tools_web_search_request: override_tools_web_search_request,
|
||||
@@ -681,6 +690,10 @@ impl Config {
|
||||
.or(cfg.tools.as_ref().and_then(|t| t.web_search))
|
||||
.unwrap_or(false);
|
||||
|
||||
let include_view_image_tool = include_view_image_tool
|
||||
.or(cfg.tools.as_ref().and_then(|t| t.view_image))
|
||||
.unwrap_or(true);
|
||||
|
||||
let model = model
|
||||
.or(config_profile.model)
|
||||
.or(cfg.model)
|
||||
@@ -784,6 +797,7 @@ impl Config {
|
||||
use_experimental_streamable_shell_tool: cfg
|
||||
.experimental_use_exec_command_tool
|
||||
.unwrap_or(false),
|
||||
include_view_image_tool,
|
||||
};
|
||||
Ok(config)
|
||||
}
|
||||
@@ -1152,6 +1166,7 @@ disable_response_storage = true
|
||||
responses_originator_header: "codex_cli_rs".to_string(),
|
||||
preferred_auth_method: AuthMode::ChatGPT,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
},
|
||||
o3_profile_config
|
||||
);
|
||||
@@ -1208,6 +1223,7 @@ disable_response_storage = true
|
||||
responses_originator_header: "codex_cli_rs".to_string(),
|
||||
preferred_auth_method: AuthMode::ChatGPT,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
};
|
||||
|
||||
assert_eq!(expected_gpt3_profile_config, gpt3_profile_config);
|
||||
@@ -1279,6 +1295,7 @@ disable_response_storage = true
|
||||
responses_originator_header: "codex_cli_rs".to_string(),
|
||||
preferred_auth_method: AuthMode::ChatGPT,
|
||||
use_experimental_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
};
|
||||
|
||||
assert_eq!(expected_zdr_profile_config, zdr_profile_config);
|
||||
|
||||
@@ -67,6 +67,7 @@ pub(crate) struct ToolsConfig {
|
||||
pub plan_tool: bool,
|
||||
pub apply_patch_tool_type: Option<ApplyPatchToolType>,
|
||||
pub web_search_request: bool,
|
||||
pub include_view_image_tool: bool,
|
||||
}
|
||||
|
||||
pub(crate) struct ToolsConfigParams<'a> {
|
||||
@@ -77,6 +78,7 @@ pub(crate) struct ToolsConfigParams<'a> {
|
||||
pub(crate) include_apply_patch_tool: bool,
|
||||
pub(crate) include_web_search_request: bool,
|
||||
pub(crate) use_streamable_shell_tool: bool,
|
||||
pub(crate) include_view_image_tool: bool,
|
||||
}
|
||||
|
||||
impl ToolsConfig {
|
||||
@@ -89,6 +91,7 @@ impl ToolsConfig {
|
||||
include_apply_patch_tool,
|
||||
include_web_search_request,
|
||||
use_streamable_shell_tool,
|
||||
include_view_image_tool,
|
||||
} = params;
|
||||
let mut shell_type = if *use_streamable_shell_tool {
|
||||
ConfigShellToolType::StreamableShell
|
||||
@@ -120,6 +123,7 @@ impl ToolsConfig {
|
||||
plan_tool: *include_plan_tool,
|
||||
apply_patch_tool_type,
|
||||
web_search_request: *include_web_search_request,
|
||||
include_view_image_tool: *include_view_image_tool,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -292,6 +296,30 @@ The shell tool is used to execute shell commands.
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
fn create_view_image_tool() -> OpenAiTool {
|
||||
// Support only local filesystem path.
|
||||
let mut properties = BTreeMap::new();
|
||||
properties.insert(
|
||||
"path".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some("Local filesystem path to an image file".to_string()),
|
||||
},
|
||||
);
|
||||
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "view_image".to_string(),
|
||||
description:
|
||||
"Attach a local image (by filesystem path) to the conversation context for this turn."
|
||||
.to_string(),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
properties,
|
||||
required: Some(vec!["path".to_string()]),
|
||||
additional_properties: Some(false),
|
||||
},
|
||||
})
|
||||
}
|
||||
/// TODO(dylan): deprecate once we get rid of json tool
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub(crate) struct ApplyPatchToolArgs {
|
||||
@@ -541,6 +569,11 @@ pub(crate) fn get_openai_tools(
|
||||
tools.push(OpenAiTool::WebSearch {});
|
||||
}
|
||||
|
||||
// Include the view_image tool so the agent can attach images to context.
|
||||
if config.include_view_image_tool {
|
||||
tools.push(create_view_image_tool());
|
||||
}
|
||||
|
||||
if let Some(mcp_tools) = mcp_tools {
|
||||
// Ensure deterministic ordering to maximize prompt cache hits.
|
||||
// HashMap iteration order is non-deterministic, so sort by fully-qualified tool name.
|
||||
@@ -604,10 +637,14 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
let tools = get_openai_tools(&config, Some(HashMap::new()));
|
||||
|
||||
assert_eq_tool_names(&tools, &["local_shell", "update_plan", "web_search"]);
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["local_shell", "update_plan", "web_search", "view_image"],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -621,10 +658,14 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
let tools = get_openai_tools(&config, Some(HashMap::new()));
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "update_plan", "web_search"]);
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["shell", "update_plan", "web_search", "view_image"],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -638,6 +679,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
let tools = get_openai_tools(
|
||||
&config,
|
||||
@@ -679,11 +721,16 @@ mod tests {
|
||||
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["shell", "web_search", "test_server/do_something_cool"],
|
||||
&[
|
||||
"shell",
|
||||
"web_search",
|
||||
"view_image",
|
||||
"test_server/do_something_cool",
|
||||
],
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "test_server/do_something_cool".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -737,6 +784,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: false,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
// Intentionally construct a map with keys that would sort alphabetically.
|
||||
@@ -794,6 +842,7 @@ mod tests {
|
||||
&tools,
|
||||
&[
|
||||
"shell",
|
||||
"view_image",
|
||||
"test_server/cool",
|
||||
"test_server/do",
|
||||
"test_server/something",
|
||||
@@ -812,6 +861,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
let tools = get_openai_tools(
|
||||
@@ -837,10 +887,13 @@ mod tests {
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/search"]);
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["shell", "web_search", "view_image", "dash/search"],
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "dash/search".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -870,6 +923,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
let tools = get_openai_tools(
|
||||
@@ -893,9 +947,12 @@ mod tests {
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/paginate"]);
|
||||
assert_eq_tool_names(
|
||||
&tools,
|
||||
&["shell", "web_search", "view_image", "dash/paginate"],
|
||||
);
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "dash/paginate".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -923,6 +980,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
let tools = get_openai_tools(
|
||||
@@ -946,9 +1004,9 @@ mod tests {
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/tags"]);
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/tags"]);
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "dash/tags".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
@@ -979,6 +1037,7 @@ mod tests {
|
||||
include_apply_patch_tool: false,
|
||||
include_web_search_request: true,
|
||||
use_streamable_shell_tool: false,
|
||||
include_view_image_tool: true,
|
||||
});
|
||||
|
||||
let tools = get_openai_tools(
|
||||
@@ -1002,9 +1061,9 @@ mod tests {
|
||||
)])),
|
||||
);
|
||||
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "dash/value"]);
|
||||
assert_eq_tool_names(&tools, &["shell", "web_search", "view_image", "dash/value"]);
|
||||
assert_eq!(
|
||||
tools[2],
|
||||
tools[3],
|
||||
OpenAiTool::Function(ResponsesApiTool {
|
||||
name: "dash/value".to_string(),
|
||||
parameters: JsonSchema::Object {
|
||||
|
||||
@@ -191,7 +191,7 @@ async fn prompt_tools_are_consistent_across_requests() {
|
||||
let expected_instructions: &str = include_str!("../../prompt.md");
|
||||
// our internal implementation is responsible for keeping tools in sync
|
||||
// with the OpenAI schema, so we just verify the tool presence here
|
||||
let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch"];
|
||||
let expected_tools_names: &[&str] = &["shell", "update_plan", "apply_patch", "view_image"];
|
||||
let body0 = requests[0].body_json::<serde_json::Value>().unwrap();
|
||||
assert_eq!(
|
||||
body0["instructions"],
|
||||
|
||||
@@ -148,6 +148,7 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
||||
base_instructions: None,
|
||||
include_plan_tool: None,
|
||||
include_apply_patch_tool: None,
|
||||
include_view_image_tool: None,
|
||||
disable_response_storage: oss.then_some(true),
|
||||
show_raw_agent_reasoning: oss.then_some(true),
|
||||
tools_web_search_request: None,
|
||||
|
||||
@@ -798,6 +798,7 @@ fn derive_config_from_params(
|
||||
base_instructions,
|
||||
include_plan_tool,
|
||||
include_apply_patch_tool,
|
||||
include_view_image_tool: None,
|
||||
disable_response_storage: None,
|
||||
show_raw_agent_reasoning: None,
|
||||
tools_web_search_request: None,
|
||||
|
||||
@@ -161,6 +161,7 @@ impl CodexToolCallParam {
|
||||
base_instructions,
|
||||
include_plan_tool,
|
||||
include_apply_patch_tool: None,
|
||||
include_view_image_tool: None,
|
||||
disable_response_storage: None,
|
||||
show_raw_agent_reasoning: None,
|
||||
tools_web_search_request: None,
|
||||
|
||||
@@ -128,6 +128,7 @@ pub async fn run_main(
|
||||
base_instructions: None,
|
||||
include_plan_tool: Some(true),
|
||||
include_apply_patch_tool: None,
|
||||
include_view_image_tool: None,
|
||||
disable_response_storage: cli.oss.then_some(true),
|
||||
show_raw_agent_reasoning: cli.oss.then_some(true),
|
||||
tools_web_search_request: cli.web_search.then_some(true),
|
||||
|
||||
Reference in New Issue
Block a user