From c5d21a4564efc03ec47b14c3396b1cad3e6854ed Mon Sep 17 00:00:00 2001 From: pap-openai Date: Fri, 22 Aug 2025 18:05:43 +0100 Subject: [PATCH] ctrl+v image + @file accepts images (#1695) allow ctrl+v in TUI for images + @file that are images are appended as raw files (and read by the model) rather than pasted as a path that cannot be read by the model. Re-used components and same interface we're using for copying pasted content in https://github.com/openai/codex/commit/72504f1d9c6eb17086d86ef1fb0d17676812461b. @aibrahim-oai as you've implemented this, mind having a look at this one? https://github.com/user-attachments/assets/c6c1153b-6b32-4558-b9a2-f8c57d2be710 --------- Co-authored-by: easong-openai Co-authored-by: Daniel Edrisian Co-authored-by: Michael Bolin --- codex-rs/Cargo.lock | 164 ++++++++ codex-rs/tui/Cargo.toml | 7 +- codex-rs/tui/src/app.rs | 9 + codex-rs/tui/src/bottom_pane/chat_composer.rs | 385 +++++++++++++++++- codex-rs/tui/src/bottom_pane/mod.rs | 19 + codex-rs/tui/src/chatwidget.rs | 21 +- codex-rs/tui/src/clipboard_paste.rs | 97 +++++ codex-rs/tui/src/lib.rs | 1 + codex-rs/tui/src/tui.rs | 39 +- 9 files changed, 728 insertions(+), 14 deletions(-) create mode 100644 codex-rs/tui/src/clipboard_paste.rs diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 34e79320..cb4cd68d 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -186,6 +186,26 @@ version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223" +[[package]] +name = "arboard" +version = "3.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55f533f8e0af236ffe5eb979b99381df3258853f00ba2e44b6e1955292c75227" +dependencies = [ + "clipboard-win", + "image", + "log", + "objc2", + "objc2-app-kit", + "objc2-core-foundation", + "objc2-core-graphics", + "objc2-foundation", + "parking_lot", + "percent-encoding", + "windows-sys 0.59.0", + "x11rb", +] + [[package]] name = "arg_enum_proc_macro" version = "0.3.4" @@ -928,6 +948,7 @@ name = "codex-tui" version = "0.0.0" dependencies = [ "anyhow", + "arboard", "async-stream", "base64 0.22.1", "chrono", @@ -962,6 +983,7 @@ dependencies = [ "strum 0.27.2", "strum_macros 0.27.2", "supports-color", + "tempfile", "textwrap 0.16.2", "tokio", "tokio-stream", @@ -1410,6 +1432,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "dispatch2" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89a09f22a6c6069a18470eb92d2298acf25463f14256d24778e1230d789a2aec" +dependencies = [ + "bitflags 2.9.1", + "objc2", +] + [[package]] name = "display_container" version = "0.9.0" @@ -1863,6 +1895,16 @@ dependencies = [ "version_check", ] +[[package]] +name = "gethostname" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0176e0459c2e4a1fe232f984bca6890e681076abb9934f6cea7c326f3fc47818" +dependencies = [ + "libc", + "windows-targets 0.48.5", +] + [[package]] name = "getopts" version = "0.2.23" @@ -3059,6 +3101,42 @@ dependencies = [ "objc2-encode", ] +[[package]] +name = "objc2-app-kit" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6f29f568bec459b0ddff777cec4fe3fd8666d82d5a40ebd0ff7e66134f89bcc" +dependencies = [ + "bitflags 2.9.1", + "objc2", + "objc2-core-graphics", + "objc2-foundation", +] + +[[package]] +name = "objc2-core-foundation" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166" +dependencies = [ + "bitflags 2.9.1", + "dispatch2", + "objc2", +] + +[[package]] +name = "objc2-core-graphics" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "989c6c68c13021b5c2d6b71456ebb0f9dc78d752e86a98da7c716f4f9470f5a4" +dependencies = [ + "bitflags 2.9.1", + "dispatch2", + "objc2", + "objc2-core-foundation", + "objc2-io-surface", +] + [[package]] name = "objc2-encode" version = "4.1.0" @@ -3073,6 +3151,18 @@ checksum = "900831247d2fe1a09a683278e5384cfb8c80c79fe6b166f9d14bfdde0ea1b03c" dependencies = [ "bitflags 2.9.1", "objc2", + "objc2-core-foundation", +] + +[[package]] +name = "objc2-io-surface" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7282e9ac92529fa3457ce90ebb15f4ecbc383e8338060960760fa2cf75420c3c" +dependencies = [ + "bitflags 2.9.1", + "objc2", + "objc2-core-foundation", ] [[package]] @@ -5846,6 +5936,21 @@ dependencies = [ "windows_x86_64_msvc 0.42.2", ] +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -5884,6 +5989,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" @@ -5902,6 +6013,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" @@ -5920,6 +6037,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -5950,6 +6073,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" @@ -5968,6 +6097,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" @@ -5986,6 +6121,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" @@ -6004,6 +6145,12 @@ version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" @@ -6070,6 +6217,23 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +[[package]] +name = "x11rb" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d91ffca73ee7f68ce055750bf9f6eca0780b8c85eff9bc046a3b0da41755e12" +dependencies = [ + "gethostname", + "rustix 0.38.44", + "x11rb-protocol", +] + +[[package]] +name = "x11rb-protocol" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec107c4503ea0b4a98ef47356329af139c0a4f7750e621cf2973cd3385ebcb3d" + [[package]] name = "yaml-rust" version = "0.4.5" diff --git a/codex-rs/tui/Cargo.toml b/codex-rs/tui/Cargo.toml index 20ceb0b7..6d69e97e 100644 --- a/codex-rs/tui/Cargo.toml +++ b/codex-rs/tui/Cargo.toml @@ -22,6 +22,7 @@ workspace = true [dependencies] anyhow = "1" +arboard = "3" async-stream = "0.3.6" base64 = "0.22.1" chrono = { version = "0.4", features = ["serde"] } @@ -41,7 +42,10 @@ codex-protocol = { path = "../protocol" } color-eyre = "0.6.3" crossterm = { version = "0.28.1", features = ["bracketed-paste", "event-stream"] } diffy = "0.4.2" -image = { version = "^0.25.6", default-features = false, features = ["jpeg"] } +image = { version = "^0.25.6", default-features = false, features = [ + "jpeg", + "png", +] } lazy_static = "1" mcp-types = { path = "../mcp-types" } once_cell = "1" @@ -61,6 +65,7 @@ shlex = "1.3.0" strum = "0.27.2" strum_macros = "0.27.2" supports-color = "3.0.2" +tempfile = "3" textwrap = "0.16.2" tokio = { version = "1", features = [ "io-std", diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index 057d77ff..a532ba71 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -145,6 +145,15 @@ impl App { }, )?; } + TuiEvent::AttachImage { + path, + width, + height, + format_label, + } => { + self.chat_widget + .attach_image(path, width, height, format_label); + } } } Ok(true) diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 0cc34542..909ecbc0 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -31,6 +31,9 @@ use crate::bottom_pane::textarea::TextArea; use crate::bottom_pane::textarea::TextAreaState; use codex_file_search::FileMatch; use std::cell::RefCell; +use std::collections::HashMap; +use std::path::Path; +use std::path::PathBuf; /// If the pasted content exceeds this number of characters, replace it with a /// placeholder in the UI. @@ -43,6 +46,12 @@ pub enum InputResult { None, } +#[derive(Clone, Debug, PartialEq)] +struct AttachedImage { + placeholder: String, + path: PathBuf, +} + struct TokenUsageInfo { total_token_usage: TokenUsage, last_token_usage: TokenUsage, @@ -71,6 +80,7 @@ pub(crate) struct ChatComposer { pending_pastes: Vec<(String, String)>, token_usage_info: Option, has_focus: bool, + attached_images: Vec, placeholder_text: String, } @@ -103,6 +113,7 @@ impl ChatComposer { pending_pastes: Vec::new(), token_usage_info: None, has_focus: has_input_focus, + attached_images: Vec::new(), placeholder_text, } } @@ -196,6 +207,20 @@ impl ChatComposer { true } + pub fn attach_image(&mut self, path: PathBuf, width: u32, height: u32, format_label: &str) { + let placeholder = format!("[image {width}x{height} {format_label}]"); + // Insert as an element to match large paste placeholder behavior: + // styled distinctly and treated atomically for cursor/mutations. + self.textarea.insert_element(&placeholder); + self.attached_images + .push(AttachedImage { placeholder, path }); + } + + pub fn take_recent_submission_images(&mut self) -> Vec { + let images = std::mem::take(&mut self.attached_images); + images.into_iter().map(|img| img.path).collect() + } + /// Integrate results from an asynchronous file search. pub(crate) fn on_file_search_result(&mut self, query: String, matches: Vec) { // Only apply if user is still editing a token starting with `query`. @@ -346,19 +371,74 @@ impl ChatComposer { modifiers: KeyModifiers::NONE, .. } => { - if let Some(sel) = popup.selected_match() { - let sel_path = sel.to_string(); - // Drop popup borrow before using self mutably again. - self.insert_selected_path(&sel_path); + let Some(sel) = popup.selected_match() else { self.active_popup = ActivePopup::None; return (InputResult::None, true); + }; + + let sel_path = sel.to_string(); + // If selected path looks like an image (png/jpeg), attach as image instead of inserting text. + let is_image = Self::is_image_path(&sel_path); + if is_image { + // Determine dimensions; if that fails fall back to normal path insertion. + let path_buf = PathBuf::from(&sel_path); + if let Ok((w, h)) = image::image_dimensions(&path_buf) { + // Remove the current @token (mirror logic from insert_selected_path without inserting text) + // using the flat text and byte-offset cursor API. + let cursor_offset = self.textarea.cursor(); + let text = self.textarea.text(); + let before_cursor = &text[..cursor_offset]; + let after_cursor = &text[cursor_offset..]; + + // Determine token boundaries in the full text. + let start_idx = before_cursor + .char_indices() + .rfind(|(_, c)| c.is_whitespace()) + .map(|(idx, c)| idx + c.len_utf8()) + .unwrap_or(0); + let end_rel_idx = after_cursor + .char_indices() + .find(|(_, c)| c.is_whitespace()) + .map(|(idx, _)| idx) + .unwrap_or(after_cursor.len()); + let end_idx = cursor_offset + end_rel_idx; + + self.textarea.replace_range(start_idx..end_idx, ""); + self.textarea.set_cursor(start_idx); + + let format_label = match Path::new(&sel_path) + .extension() + .and_then(|e| e.to_str()) + .map(|s| s.to_ascii_lowercase()) + { + Some(ext) if ext == "png" => "PNG", + Some(ext) if ext == "jpg" || ext == "jpeg" => "JPEG", + _ => "IMG", + }; + self.attach_image(path_buf.clone(), w, h, format_label); + // Add a trailing space to keep typing fluid. + self.textarea.insert_str(" "); + } else { + // Fallback to plain path insertion if metadata read fails. + self.insert_selected_path(&sel_path); + } + } else { + // Non-image: inserting file path. + self.insert_selected_path(&sel_path); } - (InputResult::None, false) + // No selection: treat Enter as closing the popup/session. + self.active_popup = ActivePopup::None; + (InputResult::None, true) } input => self.handle_input_basic(input), } } + fn is_image_path(path: &str) -> bool { + let lower = path.to_ascii_lowercase(); + lower.ends_with(".png") || lower.ends_with(".jpg") || lower.ends_with(".jpeg") + } + /// Extract the `@token` that the cursor is currently positioned on, if any. /// /// The returned string **does not** include the leading `@`. @@ -545,12 +625,19 @@ impl ChatComposer { } self.pending_pastes.clear(); - if text.is_empty() { - (InputResult::None, true) - } else { - self.history.record_local_submission(&text); - (InputResult::Submitted(text), true) + // Strip image placeholders from the submitted text; images are retrieved via take_recent_submission_images() + for img in &self.attached_images { + if text.contains(&img.placeholder) { + text = text.replace(&img.placeholder, ""); + } } + + text = text.trim().to_string(); + if !text.is_empty() { + self.history.record_local_submission(&text); + } + // Do not clear attached_images here; ChatWidget drains them via take_recent_submission_images(). + (InputResult::Submitted(text), true) } input => self.handle_input_basic(input), } @@ -558,6 +645,16 @@ impl ChatComposer { /// Handle generic Input events that modify the textarea content. fn handle_input_basic(&mut self, input: KeyEvent) -> (InputResult, bool) { + // Special handling for backspace on placeholders + if let KeyEvent { + code: KeyCode::Backspace, + .. + } = input + && self.try_remove_any_placeholder_at_cursor() + { + return (InputResult::None, true); + } + // Normal input handling self.textarea.input(input); let text_after = self.textarea.text(); @@ -566,9 +663,165 @@ impl ChatComposer { self.pending_pastes .retain(|(placeholder, _)| text_after.contains(placeholder)); + // Keep attached images in proportion to how many matching placeholders exist in the text. + // This handles duplicate placeholders that share the same visible label. + if !self.attached_images.is_empty() { + let mut needed: HashMap = HashMap::new(); + for img in &self.attached_images { + needed + .entry(img.placeholder.clone()) + .or_insert_with(|| text_after.matches(&img.placeholder).count()); + } + + let mut used: HashMap = HashMap::new(); + let mut kept: Vec = Vec::with_capacity(self.attached_images.len()); + for img in self.attached_images.drain(..) { + let total_needed = *needed.get(&img.placeholder).unwrap_or(&0); + let used_count = used.entry(img.placeholder.clone()).or_insert(0); + if *used_count < total_needed { + kept.push(img); + *used_count += 1; + } + } + self.attached_images = kept; + } + (InputResult::None, true) } + /// Attempts to remove an image or paste placeholder if the cursor is at the end of one. + /// Returns true if a placeholder was removed. + fn try_remove_any_placeholder_at_cursor(&mut self) -> bool { + let p = self.textarea.cursor(); + let text = self.textarea.text(); + + // Try image placeholders first + let mut out: Option<(usize, String)> = None; + // Detect if the cursor is at the end of any image placeholder. + // If duplicates exist, remove the specific occurrence's mapping. + for (i, img) in self.attached_images.iter().enumerate() { + let ph = &img.placeholder; + if p < ph.len() { + continue; + } + let start = p - ph.len(); + if text[start..p] != *ph { + continue; + } + + // Count the number of occurrences of `ph` before `start`. + let mut occ_before = 0usize; + let mut search_pos = 0usize; + while search_pos < start { + if let Some(found) = text[search_pos..start].find(ph) { + occ_before += 1; + search_pos += found + ph.len(); + } else { + break; + } + } + + // Remove the occ_before-th attached image that shares this placeholder label. + out = if let Some((remove_idx, _)) = self + .attached_images + .iter() + .enumerate() + .filter(|(_, img2)| img2.placeholder == *ph) + .nth(occ_before) + { + Some((remove_idx, ph.clone())) + } else { + Some((i, ph.clone())) + }; + break; + } + if let Some((idx, placeholder)) = out { + self.textarea.replace_range(p - placeholder.len()..p, ""); + self.attached_images.remove(idx); + return true; + } + + // Also handle when the cursor is at the START of an image placeholder. + // let result = 'out: { + let out: Option<(usize, String)> = 'out: { + for (i, img) in self.attached_images.iter().enumerate() { + let ph = &img.placeholder; + if p + ph.len() > text.len() { + continue; + } + if &text[p..p + ph.len()] != ph { + continue; + } + + // Count occurrences of `ph` before `p`. + let mut occ_before = 0usize; + let mut search_pos = 0usize; + while search_pos < p { + if let Some(found) = text[search_pos..p].find(ph) { + occ_before += 1; + search_pos += found + ph.len(); + } else { + break 'out None; + } + } + + if let Some((remove_idx, _)) = self + .attached_images + .iter() + .enumerate() + .filter(|(_, img2)| img2.placeholder == *ph) + .nth(occ_before) + { + break 'out Some((remove_idx, ph.clone())); + } else { + break 'out Some((i, ph.clone())); + } + } + None + }; + + if let Some((idx, placeholder)) = out { + self.textarea.replace_range(p..p + placeholder.len(), ""); + self.attached_images.remove(idx); + return true; + } + + // Then try pasted-content placeholders + if let Some(placeholder) = self.pending_pastes.iter().find_map(|(ph, _)| { + if p < ph.len() { + return None; + } + let start = p - ph.len(); + if text[start..p] == *ph { + Some(ph.clone()) + } else { + None + } + }) { + self.textarea.replace_range(p - placeholder.len()..p, ""); + self.pending_pastes.retain(|(ph, _)| ph != &placeholder); + return true; + } + + // Also handle when the cursor is at the START of a pasted-content placeholder. + if let Some(placeholder) = self.pending_pastes.iter().find_map(|(ph, _)| { + if p + ph.len() > text.len() { + return None; + } + if &text[p..p + ph.len()] == ph { + Some(ph.clone()) + } else { + None + } + }) { + self.textarea.replace_range(p..p + placeholder.len(), ""); + self.pending_pastes.retain(|(ph, _)| ph != &placeholder); + return true; + } + + false + } + /// Synchronize `self.command_popup` with the current text in the /// textarea. This must be called after every modification that can change /// the text so the popup is shown/updated/hidden as appropriate. @@ -746,10 +999,14 @@ impl WidgetRef for &ChatComposer { #[cfg(test)] mod tests { + use super::*; + use std::path::PathBuf; + use crate::app_event::AppEvent; use crate::bottom_pane::AppEventSender; use crate::bottom_pane::ChatComposer; use crate::bottom_pane::InputResult; + use crate::bottom_pane::chat_composer::AttachedImage; use crate::bottom_pane::chat_composer::LARGE_PASTE_CHAR_THRESHOLD; use crate::bottom_pane::textarea::TextArea; use tokio::sync::mpsc::unbounded_channel; @@ -1312,4 +1569,112 @@ mod tests { ] ); } + + // --- Image attachment tests --- + #[test] + fn attach_image_and_submit_includes_image_paths() { + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = + ChatComposer::new(true, sender, false, "Ask Codex to do anything".to_string()); + let path = PathBuf::from("/tmp/image1.png"); + composer.attach_image(path.clone(), 32, 16, "PNG"); + composer.handle_paste(" hi".into()); + let (result, _) = + composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); + match result { + InputResult::Submitted(text) => assert_eq!(text, "hi"), + _ => panic!("expected Submitted"), + } + let imgs = composer.take_recent_submission_images(); + assert_eq!(vec![path], imgs); + } + + #[test] + fn attach_image_without_text_submits_empty_text_and_images() { + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = + ChatComposer::new(true, sender, false, "Ask Codex to do anything".to_string()); + let path = PathBuf::from("/tmp/image2.png"); + composer.attach_image(path.clone(), 10, 5, "PNG"); + let (result, _) = + composer.handle_key_event(KeyEvent::new(KeyCode::Enter, KeyModifiers::NONE)); + match result { + InputResult::Submitted(text) => assert!(text.is_empty()), + _ => panic!("expected Submitted"), + } + let imgs = composer.take_recent_submission_images(); + assert_eq!(imgs.len(), 1); + assert_eq!(imgs[0], path); + assert!(composer.attached_images.is_empty()); + } + + #[test] + fn image_placeholder_backspace_behaves_like_text_placeholder() { + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = + ChatComposer::new(true, sender, false, "Ask Codex to do anything".to_string()); + let path = PathBuf::from("/tmp/image3.png"); + composer.attach_image(path.clone(), 20, 10, "PNG"); + let placeholder = composer.attached_images[0].placeholder.clone(); + + // Case 1: backspace at end + composer.textarea.move_cursor_to_end_of_line(false); + composer.handle_key_event(KeyEvent::new(KeyCode::Backspace, KeyModifiers::NONE)); + assert!(!composer.textarea.text().contains(&placeholder)); + assert!(composer.attached_images.is_empty()); + + // Re-add and test backspace in middle: should break the placeholder string + // and drop the image mapping (same as text placeholder behavior). + composer.attach_image(path.clone(), 20, 10, "PNG"); + let placeholder2 = composer.attached_images[0].placeholder.clone(); + // Move cursor to roughly middle of placeholder + if let Some(start_pos) = composer.textarea.text().find(&placeholder2) { + let mid_pos = start_pos + (placeholder2.len() / 2); + composer.textarea.set_cursor(mid_pos); + composer.handle_key_event(KeyEvent::new(KeyCode::Backspace, KeyModifiers::NONE)); + assert!(!composer.textarea.text().contains(&placeholder2)); + assert!(composer.attached_images.is_empty()); + } else { + panic!("Placeholder not found in textarea"); + } + } + + #[test] + fn deleting_one_of_duplicate_image_placeholders_removes_matching_entry() { + let (tx, _rx) = unbounded_channel::(); + let sender = AppEventSender::new(tx); + let mut composer = + ChatComposer::new(true, sender, false, "Ask Codex to do anything".to_string()); + + let path1 = PathBuf::from("/tmp/image_dup1.png"); + let path2 = PathBuf::from("/tmp/image_dup2.png"); + + composer.attach_image(path1.clone(), 10, 5, "PNG"); + // separate placeholders with a space for clarity + composer.handle_paste(" ".into()); + composer.attach_image(path2.clone(), 10, 5, "PNG"); + + let ph = composer.attached_images[0].placeholder.clone(); + let text = composer.textarea.text().to_string(); + let start1 = text.find(&ph).expect("first placeholder present"); + let end1 = start1 + ph.len(); + composer.textarea.set_cursor(end1); + + // Backspace should delete the first placeholder and its mapping. + composer.handle_key_event(KeyEvent::new(KeyCode::Backspace, KeyModifiers::NONE)); + + let new_text = composer.textarea.text().to_string(); + assert_eq!(1, new_text.matches(&ph).count(), "one placeholder remains"); + assert_eq!( + vec![AttachedImage { + path: path2, + placeholder: "[image 10x5 PNG]".to_string() + }], + composer.attached_images, + "one image mapping remains" + ); + } } diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index 48ad3f02..04f5d4b9 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -1,4 +1,5 @@ //! Bottom pane: shows the ChatComposer or a BottomPaneView, if one is active. +use std::path::PathBuf; use crate::app_event_sender::AppEventSender; use crate::tui::FrameRequester; @@ -342,6 +343,24 @@ impl BottomPane { self.composer.on_file_search_result(query, matches); self.request_redraw(); } + + pub(crate) fn attach_image( + &mut self, + path: PathBuf, + width: u32, + height: u32, + format_label: &str, + ) { + if self.active_view.is_none() { + self.composer + .attach_image(path, width, height, format_label); + self.request_redraw(); + } + } + + pub(crate) fn take_recent_submission_images(&mut self) -> Vec { + self.composer.take_recent_submission_images() + } } impl WidgetRef for &BottomPane { diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 6e6f8de8..0e4bd856 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -583,7 +583,11 @@ impl ChatWidget { match self.bottom_pane.handle_key_event(key_event) { InputResult::Submitted(text) => { - self.submit_user_message(text.into()); + let images = self.bottom_pane.take_recent_submission_images(); + self.submit_user_message(UserMessage { + text, + image_paths: images, + }); } InputResult::Command(cmd) => { self.dispatch_command(cmd); @@ -592,6 +596,21 @@ impl ChatWidget { } } + pub(crate) fn attach_image( + &mut self, + path: PathBuf, + width: u32, + height: u32, + format_label: &str, + ) { + tracing::info!( + "attach_image path={path:?} width={width} height={height} format={format_label}", + ); + self.bottom_pane + .attach_image(path.clone(), width, height, format_label); + self.request_redraw(); + } + fn dispatch_command(&mut self, cmd: SlashCommand) { match cmd { SlashCommand::New => { diff --git a/codex-rs/tui/src/clipboard_paste.rs b/codex-rs/tui/src/clipboard_paste.rs new file mode 100644 index 00000000..3888ac34 --- /dev/null +++ b/codex-rs/tui/src/clipboard_paste.rs @@ -0,0 +1,97 @@ +use std::path::PathBuf; +use tempfile::Builder; + +#[derive(Debug)] +pub enum PasteImageError { + ClipboardUnavailable(String), + NoImage(String), + EncodeFailed(String), + IoError(String), +} + +impl std::fmt::Display for PasteImageError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PasteImageError::ClipboardUnavailable(msg) => write!(f, "clipboard unavailable: {msg}"), + PasteImageError::NoImage(msg) => write!(f, "no image on clipboard: {msg}"), + PasteImageError::EncodeFailed(msg) => write!(f, "could not encode image: {msg}"), + PasteImageError::IoError(msg) => write!(f, "io error: {msg}"), + } + } +} +impl std::error::Error for PasteImageError {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum EncodedImageFormat { + Png, +} + +impl EncodedImageFormat { + pub fn label(self) -> &'static str { + match self { + EncodedImageFormat::Png => "PNG", + } + } +} + +#[derive(Debug, Clone)] +pub struct PastedImageInfo { + pub width: u32, + pub height: u32, + pub encoded_format: EncodedImageFormat, // Always PNG for now. +} + +/// Capture image from system clipboard, encode to PNG, and return bytes + info. +pub fn paste_image_as_png() -> Result<(Vec, PastedImageInfo), PasteImageError> { + tracing::debug!("attempting clipboard image read"); + let mut cb = arboard::Clipboard::new() + .map_err(|e| PasteImageError::ClipboardUnavailable(e.to_string()))?; + let img = cb + .get_image() + .map_err(|e| PasteImageError::NoImage(e.to_string()))?; + let w = img.width as u32; + let h = img.height as u32; + + let mut png: Vec = Vec::new(); + let Some(rgba_img) = image::RgbaImage::from_raw(w, h, img.bytes.into_owned()) else { + return Err(PasteImageError::EncodeFailed("invalid RGBA buffer".into())); + }; + let dyn_img = image::DynamicImage::ImageRgba8(rgba_img); + tracing::debug!("clipboard image decoded RGBA {w}x{h}"); + { + let mut cursor = std::io::Cursor::new(&mut png); + dyn_img + .write_to(&mut cursor, image::ImageFormat::Png) + .map_err(|e| PasteImageError::EncodeFailed(e.to_string()))?; + } + + tracing::debug!( + "clipboard image encoded to PNG ({len} bytes)", + len = png.len() + ); + Ok(( + png, + PastedImageInfo { + width: w, + height: h, + encoded_format: EncodedImageFormat::Png, + }, + )) +} + +/// Convenience: write to a temp file and return its path + info. +pub fn paste_image_to_temp_png() -> Result<(PathBuf, PastedImageInfo), PasteImageError> { + let (png, info) = paste_image_as_png()?; + // Create a unique temporary file with a .png suffix to avoid collisions. + let tmp = Builder::new() + .prefix("codex-clipboard-") + .suffix(".png") + .tempfile() + .map_err(|e| PasteImageError::IoError(e.to_string()))?; + std::fs::write(tmp.path(), &png).map_err(|e| PasteImageError::IoError(e.to_string()))?; + // Persist the file (so it remains after the handle is dropped) and return its PathBuf. + let (_file, path) = tmp + .keep() + .map_err(|e| PasteImageError::IoError(e.error.to_string()))?; + Ok((path, info)) +} diff --git a/codex-rs/tui/src/lib.rs b/codex-rs/tui/src/lib.rs index 454a7f3e..bce0d899 100644 --- a/codex-rs/tui/src/lib.rs +++ b/codex-rs/tui/src/lib.rs @@ -30,6 +30,7 @@ mod bottom_pane; mod chatwidget; mod citation_regex; mod cli; +mod clipboard_paste; mod common; pub mod custom_terminal; mod diff_render; diff --git a/codex-rs/tui/src/tui.rs b/codex-rs/tui/src/tui.rs index 3f4df39b..4498f46e 100644 --- a/codex-rs/tui/src/tui.rs +++ b/codex-rs/tui/src/tui.rs @@ -1,6 +1,7 @@ use std::io::Result; use std::io::Stdout; use std::io::stdout; +use std::path::PathBuf; use std::pin::Pin; use std::sync::Arc; use std::sync::atomic::AtomicBool; @@ -15,7 +16,11 @@ use crossterm::cursor; use crossterm::cursor::MoveTo; use crossterm::event::DisableBracketedPaste; use crossterm::event::EnableBracketedPaste; +use crossterm::event::Event; +use crossterm::event::KeyCode; use crossterm::event::KeyEvent; +use crossterm::event::KeyEventKind; +use crossterm::event::KeyModifiers; use crossterm::event::KeyboardEnhancementFlags; use crossterm::event::PopKeyboardEnhancementFlags; use crossterm::event::PushKeyboardEnhancementFlags; @@ -30,6 +35,7 @@ use ratatui::crossterm::terminal::enable_raw_mode; use ratatui::layout::Offset; use ratatui::text::Line; +use crate::clipboard_paste::paste_image_to_temp_png; use crate::custom_terminal; use crate::custom_terminal::Terminal as CustomTerminal; use tokio::select; @@ -103,6 +109,12 @@ pub enum TuiEvent { Key(KeyEvent), Paste(String), Draw, + AttachImage { + path: PathBuf, + width: u32, + height: u32, + format_label: &'static str, + }, } pub struct Tui { @@ -236,6 +248,29 @@ impl Tui { select! { Some(Ok(event)) = crossterm_events.next() => { match event { + // Detect Ctrl+V to attach an image from the clipboard. + Event::Key(key_event @ KeyEvent { + code: KeyCode::Char('v'), + modifiers: KeyModifiers::CONTROL, + kind: KeyEventKind::Press, + .. + }) => { + match paste_image_to_temp_png() { + Ok((path, info)) => { + yield TuiEvent::AttachImage { + path, + width: info.width, + height: info.height, + format_label: info.encoded_format.label(), + }; + } + Err(_) => { + // Fall back to normal key handling if no image is available. + yield TuiEvent::Key(key_event); + } + } + } + crossterm::event::Event::Key(key_event) => { #[cfg(unix)] if matches!( @@ -261,10 +296,10 @@ impl Tui { } yield TuiEvent::Key(key_event); } - crossterm::event::Event::Resize(_, _) => { + Event::Resize(_, _) => { yield TuiEvent::Draw; } - crossterm::event::Event::Paste(pasted) => { + Event::Paste(pasted) => { yield TuiEvent::Paste(pasted); } _ => {}