show "Viewed Image" when the model views an image (#4475)

<img width="1022" height="339" alt="Screenshot 2025-09-29 at 4 22 00 PM"
src="https://github.com/user-attachments/assets/12da7358-19be-4010-a71b-496ede6dfbbf"
/>
This commit is contained in:
Jeremy Rose
2025-10-02 11:36:03 -07:00
committed by GitHub
parent ec98445abf
commit 45936f8fbd
10 changed files with 83 additions and 2 deletions

View File

@@ -109,6 +109,7 @@ use crate::protocol::Submission;
use crate::protocol::TokenCountEvent;
use crate::protocol::TokenUsage;
use crate::protocol::TurnDiffEvent;
use crate::protocol::ViewImageToolCallEvent;
use crate::protocol::WebSearchBeginEvent;
use crate::rollout::RolloutRecorder;
use crate::rollout::RolloutRecorderParams;
@@ -2469,13 +2470,21 @@ async fn handle_function_call(
))
})?;
let abs = turn_context.resolve_path(Some(args.path));
sess.inject_input(vec![InputItem::LocalImage { path: abs }])
sess.inject_input(vec![InputItem::LocalImage { path: abs.clone() }])
.await
.map_err(|_| {
FunctionCallError::RespondToModel(
"unable to attach image (no active task)".to_string(),
)
})?;
sess.send_event(Event {
id: sub_id.clone(),
msg: EventMsg::ViewImageToolCall(ViewImageToolCallEvent {
call_id: call_id.clone(),
path: abs,
}),
})
.await;
Ok("attached local image path".to_string())
}

View File

@@ -70,6 +70,7 @@ pub(crate) fn should_persist_event_msg(ev: &EventMsg) -> bool {
| EventMsg::ListCustomPromptsResponse(_)
| EventMsg::PlanUpdate(_)
| EventMsg::ShutdownComplete
| EventMsg::ViewImageToolCall(_)
| EventMsg::ConversationPath(_) => false,
}
}

View File

@@ -580,6 +580,14 @@ impl EventProcessor for EventProcessorWithHumanOutput {
EventMsg::ListCustomPromptsResponse(_) => {
// Currently ignored in exec output.
}
EventMsg::ViewImageToolCall(view) => {
ts_println!(
self,
"{} {}",
"viewed image".style(self.magenta),
view.path.display()
);
}
EventMsg::TurnAborted(abort_reason) => match abort_reason.reason {
TurnAbortReason::Interrupted => {
ts_println!(self, "task interrupted");

View File

@@ -280,6 +280,7 @@ async fn run_codex_tool_session_inner(
| EventMsg::ConversationPath(_)
| EventMsg::UserMessage(_)
| EventMsg::ShutdownComplete
| EventMsg::ViewImageToolCall(_)
| EventMsg::EnteredReviewMode(_)
| EventMsg::ExitedReviewMode(_) => {
// For now, we do not do anything extra for these

View File

@@ -477,6 +477,9 @@ pub enum EventMsg {
ExecCommandEnd(ExecCommandEndEvent),
/// Notification that the agent attached a local image via the view_image tool.
ViewImageToolCall(ViewImageToolCallEvent),
ExecApprovalRequest(ExecApprovalRequestEvent),
ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent),
@@ -1074,6 +1077,14 @@ pub struct ExecCommandEndEvent {
pub formatted_output: String,
}
#[derive(Debug, Clone, Deserialize, Serialize, TS)]
pub struct ViewImageToolCallEvent {
/// Identifier for the originating tool call.
pub call_id: String,
/// Local filesystem path provided to the tool.
pub path: PathBuf,
}
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, TS)]
#[serde(rename_all = "snake_case")]
pub enum ExecOutputStream {

View File

@@ -39,6 +39,7 @@ use codex_core::protocol::TokenUsageInfo;
use codex_core::protocol::TurnAbortReason;
use codex_core::protocol::TurnDiffEvent;
use codex_core::protocol::UserMessageEvent;
use codex_core::protocol::ViewImageToolCallEvent;
use codex_core::protocol::WebSearchBeginEvent;
use codex_core::protocol::WebSearchEndEvent;
use codex_protocol::ConversationId;
@@ -538,6 +539,15 @@ impl ChatWidget {
));
}
fn on_view_image_tool_call(&mut self, event: ViewImageToolCallEvent) {
self.flush_answer_stream_with_separator();
self.add_to_history(history_cell::new_view_image_tool_call(
event.path,
&self.config.cwd,
));
self.request_redraw();
}
fn on_patch_apply_end(&mut self, event: codex_core::protocol::PatchApplyEndEvent) {
let ev2 = event.clone();
self.defer_or_handle(
@@ -1398,6 +1408,7 @@ impl ChatWidget {
EventMsg::PatchApplyBegin(ev) => self.on_patch_apply_begin(ev),
EventMsg::PatchApplyEnd(ev) => self.on_patch_apply_end(ev),
EventMsg::ExecCommandEnd(ev) => self.on_exec_command_end(ev),
EventMsg::ViewImageToolCall(ev) => self.on_view_image_tool_call(ev),
EventMsg::McpToolCallBegin(ev) => self.on_mcp_tool_call_begin(ev),
EventMsg::McpToolCallEnd(ev) => self.on_mcp_tool_call_end(ev),
EventMsg::WebSearchBegin(ev) => self.on_web_search_begin(ev),

View File

@@ -0,0 +1,6 @@
---
source: tui/src/chatwidget/tests.rs
expression: combined
---
• Viewed Image
└ example.png

View File

@@ -35,6 +35,7 @@ use codex_core::protocol::ReviewRequest;
use codex_core::protocol::StreamErrorEvent;
use codex_core::protocol::TaskCompleteEvent;
use codex_core::protocol::TaskStartedEvent;
use codex_core::protocol::ViewImageToolCallEvent;
use codex_protocol::ConversationId;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
@@ -794,6 +795,25 @@ fn custom_prompt_enter_empty_does_not_send() {
assert!(rx.try_recv().is_err(), "no app event should be sent");
}
#[test]
fn view_image_tool_call_adds_history_cell() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual();
let image_path = chat.config.cwd.join("example.png");
chat.handle_codex_event(Event {
id: "sub-image".into(),
msg: EventMsg::ViewImageToolCall(ViewImageToolCallEvent {
call_id: "call-image".into(),
path: image_path,
}),
});
let cells = drain_insert_history(&mut rx);
assert_eq!(cells.len(), 1, "expected a single history cell");
let combined = lines_to_single_string(&cells[0]);
assert_snapshot!("local_image_attachment_history_snapshot", combined);
}
// Snapshot test: interrupting a running exec finalizes the active cell with a red ✗
// marker (replacing the spinner) and flushes it into history.
#[test]

View File

@@ -268,7 +268,9 @@ pub(crate) fn display_path_for(path: &Path, cwd: &Path) -> String {
let chosen = if path_in_same_repo {
pathdiff::diff_paths(path, cwd).unwrap_or_else(|| path.to_path_buf())
} else {
relativize_to_home(path).unwrap_or_else(|| path.to_path_buf())
relativize_to_home(path)
.map(|p| PathBuf::from_iter([Path::new("~"), p.as_path()]))
.unwrap_or_else(|| path.to_path_buf())
};
chosen.display().to_string()
}

View File

@@ -1,4 +1,5 @@
use crate::diff_render::create_diff_summary;
use crate::diff_render::display_path_for;
use crate::exec_cell::CommandOutput;
use crate::exec_cell::OutputLinesParams;
use crate::exec_cell::TOOL_CALL_MAX_LINES;
@@ -1037,6 +1038,17 @@ pub(crate) fn new_patch_apply_failure(stderr: String) -> PlainHistoryCell {
PlainHistoryCell { lines }
}
pub(crate) fn new_view_image_tool_call(path: PathBuf, cwd: &Path) -> PlainHistoryCell {
let display_path = display_path_for(&path, cwd);
let lines: Vec<Line<'static>> = vec![
vec!["".dim(), "Viewed Image".bold()].into(),
vec!["".dim(), display_path.dim()].into(),
];
PlainHistoryCell { lines }
}
pub(crate) fn new_reasoning_block(
full_reasoning_buffer: String,
config: &Config,