From 1f3318c1c5b83e29b6d6e61eb2bf7d599581023e Mon Sep 17 00:00:00 2001 From: Gabriel Peal Date: Mon, 4 Aug 2025 08:57:04 -0700 Subject: [PATCH] Add a TurnDiffTracker to create a unified diff for an entire turn (#1770) This lets us show an accumulating diff across all patches in a turn. Refer to the docs for TurnDiffTracker for implementation details. There are multiple ways this could have been done and this felt like the right tradeoff between reliability and completeness: *Pros* * It will pick up all changes to files that the model touched including if they prettier or another command that updates them. * It will not pick up changes made by the user or other agents to files it didn't modify. *Cons* * It will pick up changes that the user made to a file that the model also touched * It will not pick up changes to codegen or files that were not modified with apply_patch --- codex-rs/Cargo.lock | 1 + codex-rs/core/Cargo.toml | 1 + codex-rs/core/src/codex.rs | 111 ++- codex-rs/core/src/lib.rs | 1 + codex-rs/core/src/protocol.rs | 7 + codex-rs/core/src/turn_diff_tracker.rs | 887 ++++++++++++++++++ .../src/event_processor_with_human_output.rs | 6 + codex-rs/mcp-server/src/codex_tool_runner.rs | 1 + codex-rs/mcp-server/src/conversation_loop.rs | 1 + 9 files changed, 998 insertions(+), 18 deletions(-) create mode 100644 codex-rs/core/src/turn_diff_tracker.rs diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 7d4e41d0..eb4eccd8 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -699,6 +699,7 @@ dependencies = [ "serde_json", "sha1", "shlex", + "similar", "strum_macros 0.27.2", "tempfile", "thiserror 2.0.12", diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml index db3fd4f8..466e9adf 100644 --- a/codex-rs/core/Cargo.toml +++ b/codex-rs/core/Cargo.toml @@ -34,6 +34,7 @@ serde_json = "1" serde_bytes = "0.11" sha1 = "0.10.6" shlex = "1.3.0" +similar = "2.7.0" strum_macros = "0.27.2" thiserror = "2.0.12" time = { version = "0.3", features = ["formatting", "local-offset", "macros"] } diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 7004dcfc..568d87c4 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -85,11 +85,13 @@ use crate::protocol::SandboxPolicy; use crate::protocol::SessionConfiguredEvent; use crate::protocol::Submission; use crate::protocol::TaskCompleteEvent; +use crate::protocol::TurnDiffEvent; use crate::rollout::RolloutRecorder; use crate::safety::SafetyCheck; use crate::safety::assess_command_safety; use crate::safety::assess_safety_for_untrusted_command; use crate::shell; +use crate::turn_diff_tracker::TurnDiffTracker; use crate::user_notification::UserNotification; use crate::util::backoff; @@ -362,7 +364,11 @@ impl Session { } } - async fn notify_exec_command_begin(&self, exec_command_context: ExecCommandContext) { + async fn on_exec_command_begin( + &self, + turn_diff_tracker: &mut TurnDiffTracker, + exec_command_context: ExecCommandContext, + ) { let ExecCommandContext { sub_id, call_id, @@ -374,11 +380,15 @@ impl Session { Some(ApplyPatchCommandContext { user_explicitly_approved_this_action, changes, - }) => EventMsg::PatchApplyBegin(PatchApplyBeginEvent { - call_id, - auto_approved: !user_explicitly_approved_this_action, - changes, - }), + }) => { + turn_diff_tracker.on_patch_begin(&changes); + + EventMsg::PatchApplyBegin(PatchApplyBeginEvent { + call_id, + auto_approved: !user_explicitly_approved_this_action, + changes, + }) + } None => EventMsg::ExecCommandBegin(ExecCommandBeginEvent { call_id, command: command_for_display.clone(), @@ -392,8 +402,10 @@ impl Session { let _ = self.tx_event.send(event).await; } - async fn notify_exec_command_end( + #[allow(clippy::too_many_arguments)] + async fn on_exec_command_end( &self, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, call_id: &str, output: &ExecToolCallOutput, @@ -433,6 +445,20 @@ impl Session { msg, }; let _ = self.tx_event.send(event).await; + + // If this is an apply_patch, after we emit the end patch, emit a second event + // with the full turn diff if there is one. + if is_apply_patch { + let unified_diff = turn_diff_tracker.get_unified_diff(); + if let Ok(Some(unified_diff)) = unified_diff { + let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); + let event = Event { + id: sub_id.into(), + msg, + }; + let _ = self.tx_event.send(event).await; + } + } } /// Helper that emits a BackgroundEvent with the given message. This keeps @@ -1006,6 +1032,10 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { .await; let last_agent_message: Option; + // Although from the perspective of codex.rs, TurnDiffTracker has the lifecycle of a Task which contains + // many turns, from the perspective of the user, it is a single turn. + let mut turn_diff_tracker = TurnDiffTracker::new(); + loop { // Note that pending_input would be something like a message the user // submitted through the UI while the model was running. Though the UI @@ -1037,7 +1067,7 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { }) }) .collect(); - match run_turn(&sess, sub_id.clone(), turn_input).await { + match run_turn(&sess, &mut turn_diff_tracker, sub_id.clone(), turn_input).await { Ok(turn_output) => { let mut items_to_record_in_conversation_history = Vec::::new(); let mut responses = Vec::::new(); @@ -1163,6 +1193,7 @@ async fn run_task(sess: Arc, sub_id: String, input: Vec) { async fn run_turn( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, input: Vec, ) -> CodexResult> { @@ -1177,7 +1208,7 @@ async fn run_turn( let mut retries = 0; loop { - match try_run_turn(sess, &sub_id, &prompt).await { + match try_run_turn(sess, turn_diff_tracker, &sub_id, &prompt).await { Ok(output) => return Ok(output), Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted), Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)), @@ -1223,6 +1254,7 @@ struct ProcessedResponseItem { async fn try_run_turn( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, prompt: &Prompt, ) -> CodexResult> { @@ -1310,7 +1342,8 @@ async fn try_run_turn( match event { ResponseEvent::Created => {} ResponseEvent::OutputItemDone(item) => { - let response = handle_response_item(sess, sub_id, item.clone()).await?; + let response = + handle_response_item(sess, turn_diff_tracker, sub_id, item.clone()).await?; output.push(ProcessedResponseItem { item, response }); } @@ -1328,6 +1361,16 @@ async fn try_run_turn( .ok(); } + let unified_diff = turn_diff_tracker.get_unified_diff(); + if let Ok(Some(unified_diff)) = unified_diff { + let msg = EventMsg::TurnDiff(TurnDiffEvent { unified_diff }); + let event = Event { + id: sub_id.to_string(), + msg, + }; + let _ = sess.tx_event.send(event).await; + } + return Ok(output); } ResponseEvent::OutputTextDelta(delta) => { @@ -1432,6 +1475,7 @@ async fn run_compact_task( async fn handle_response_item( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: &str, item: ResponseItem, ) -> CodexResult> { @@ -1469,7 +1513,17 @@ async fn handle_response_item( .. } => { info!("FunctionCall: {arguments}"); - Some(handle_function_call(sess, sub_id.to_string(), name, arguments, call_id).await) + Some( + handle_function_call( + sess, + turn_diff_tracker, + sub_id.to_string(), + name, + arguments, + call_id, + ) + .await, + ) } ResponseItem::LocalShellCall { id, @@ -1504,6 +1558,7 @@ async fn handle_response_item( handle_container_exec_with_params( exec_params, sess, + turn_diff_tracker, sub_id.to_string(), effective_call_id, ) @@ -1521,6 +1576,7 @@ async fn handle_response_item( async fn handle_function_call( sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, name: String, arguments: String, @@ -1534,7 +1590,8 @@ async fn handle_function_call( return *output; } }; - handle_container_exec_with_params(params, sess, sub_id, call_id).await + handle_container_exec_with_params(params, sess, turn_diff_tracker, sub_id, call_id) + .await } "update_plan" => handle_update_plan(sess, arguments, sub_id, call_id).await, _ => { @@ -1608,6 +1665,7 @@ fn maybe_run_with_user_profile(params: ExecParams, sess: &Session) -> ExecParams async fn handle_container_exec_with_params( params: ExecParams, sess: &Session, + turn_diff_tracker: &mut TurnDiffTracker, sub_id: String, call_id: String, ) -> ResponseInputItem { @@ -1755,7 +1813,7 @@ async fn handle_container_exec_with_params( }, ), }; - sess.notify_exec_command_begin(exec_command_context.clone()) + sess.on_exec_command_begin(turn_diff_tracker, exec_command_context.clone()) .await; let params = maybe_run_with_user_profile(params, sess); @@ -1782,7 +1840,8 @@ async fn handle_container_exec_with_params( duration, } = &output; - sess.notify_exec_command_end( + sess.on_exec_command_end( + turn_diff_tracker, &sub_id, &call_id, &output, @@ -1806,7 +1865,15 @@ async fn handle_container_exec_with_params( } } Err(CodexErr::Sandbox(error)) => { - handle_sandbox_error(params, exec_command_context, error, sandbox_type, sess).await + handle_sandbox_error( + turn_diff_tracker, + params, + exec_command_context, + error, + sandbox_type, + sess, + ) + .await } Err(e) => { // Handle non-sandbox errors @@ -1822,6 +1889,7 @@ async fn handle_container_exec_with_params( } async fn handle_sandbox_error( + turn_diff_tracker: &mut TurnDiffTracker, params: ExecParams, exec_command_context: ExecCommandContext, error: SandboxErr, @@ -1878,7 +1946,8 @@ async fn handle_sandbox_error( sess.notify_background_event(&sub_id, "retrying command without sandbox") .await; - sess.notify_exec_command_begin(exec_command_context).await; + sess.on_exec_command_begin(turn_diff_tracker, exec_command_context) + .await; // This is an escalated retry; the policy will not be // examined and the sandbox has been set to `None`. @@ -1905,8 +1974,14 @@ async fn handle_sandbox_error( duration, } = &retry_output; - sess.notify_exec_command_end(&sub_id, &call_id, &retry_output, is_apply_patch) - .await; + sess.on_exec_command_end( + turn_diff_tracker, + &sub_id, + &call_id, + &retry_output, + is_apply_patch, + ) + .await; let is_success = *exit_code == 0; let content = format_exec_output( diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs index 80f90149..4f083d9e 100644 --- a/codex-rs/core/src/lib.rs +++ b/codex-rs/core/src/lib.rs @@ -42,6 +42,7 @@ pub(crate) mod safety; pub mod seatbelt; pub mod shell; pub mod spawn; +pub mod turn_diff_tracker; mod user_notification; pub mod util; diff --git a/codex-rs/core/src/protocol.rs b/codex-rs/core/src/protocol.rs index cbb211d9..82591a2c 100644 --- a/codex-rs/core/src/protocol.rs +++ b/codex-rs/core/src/protocol.rs @@ -387,6 +387,8 @@ pub enum EventMsg { /// Notification that a patch application has finished. PatchApplyEnd(PatchApplyEndEvent), + TurnDiff(TurnDiffEvent), + /// Response to GetHistoryEntryRequest. GetHistoryEntryResponse(GetHistoryEntryResponseEvent), @@ -598,6 +600,11 @@ pub struct PatchApplyEndEvent { pub success: bool, } +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct TurnDiffEvent { + pub unified_diff: String, +} + #[derive(Debug, Clone, Deserialize, Serialize)] pub struct GetHistoryEntryResponseEvent { pub offset: usize, diff --git a/codex-rs/core/src/turn_diff_tracker.rs b/codex-rs/core/src/turn_diff_tracker.rs new file mode 100644 index 00000000..7026d7bb --- /dev/null +++ b/codex-rs/core/src/turn_diff_tracker.rs @@ -0,0 +1,887 @@ +use std::collections::HashMap; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::process::Command; + +use anyhow::Context; +use anyhow::Result; +use anyhow::anyhow; +use sha1::digest::Output; +use uuid::Uuid; + +use crate::protocol::FileChange; + +const ZERO_OID: &str = "0000000000000000000000000000000000000000"; +const DEV_NULL: &str = "/dev/null"; + +struct BaselineFileInfo { + path: PathBuf, + content: Vec, + mode: FileMode, + oid: String, +} + +/// Tracks sets of changes to files and exposes the overall unified diff. +/// Internally, the way this works is now: +/// 1. Maintain an in-memory baseline snapshot of files when they are first seen. +/// For new additions, do not create a baseline so that diffs are shown as proper additions (using /dev/null). +/// 2. Keep a stable internal filename (uuid) per external path for rename tracking. +/// 3. To compute the aggregated unified diff, compare each baseline snapshot to the current file on disk entirely in-memory +/// using the `similar` crate and emit unified diffs with rewritten external paths. +#[derive(Default)] +pub struct TurnDiffTracker { + /// Map external path -> internal filename (uuid). + external_to_temp_name: HashMap, + /// Internal filename -> baseline file info. + baseline_file_info: HashMap, + /// Internal filename -> external path as of current accumulated state (after applying all changes). + /// This is where renames are tracked. + temp_name_to_current_path: HashMap, + /// Cache of known git worktree roots to avoid repeated filesystem walks. + git_root_cache: Vec, +} + +impl TurnDiffTracker { + pub fn new() -> Self { + Self::default() + } + + /// Front-run apply patch calls to track the starting contents of any modified files. + /// - Creates an in-memory baseline snapshot for files that already exist on disk when first seen. + /// - For additions, we intentionally do not create a baseline snapshot so that diffs are proper additions. + /// - Also updates internal mappings for move/rename events. + pub fn on_patch_begin(&mut self, changes: &HashMap) { + for (path, change) in changes.iter() { + // Ensure a stable internal filename exists for this external path. + if !self.external_to_temp_name.contains_key(path) { + let internal = Uuid::new_v4().to_string(); + self.external_to_temp_name + .insert(path.clone(), internal.clone()); + self.temp_name_to_current_path + .insert(internal.clone(), path.clone()); + + // If the file exists on disk now, snapshot as baseline; else leave missing to represent /dev/null. + let baseline_file_info = if path.exists() { + let mode = file_mode_for_path(path); + let mode_val = mode.unwrap_or(FileMode::Regular); + let content = blob_bytes(path, &mode_val).unwrap_or_default(); + let oid = if mode == Some(FileMode::Symlink) { + format!("{:x}", git_blob_sha1_hex_bytes(&content)) + } else { + self.git_blob_oid_for_path(path) + .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(&content))) + }; + Some(BaselineFileInfo { + path: path.clone(), + content, + mode: mode_val, + oid, + }) + } else { + Some(BaselineFileInfo { + path: path.clone(), + content: vec![], + mode: FileMode::Regular, + oid: ZERO_OID.to_string(), + }) + }; + + if let Some(baseline_file_info) = baseline_file_info { + self.baseline_file_info + .insert(internal.clone(), baseline_file_info); + } + } + + // Track rename/move in current mapping if provided in an Update. + if let FileChange::Update { + move_path: Some(dest), + .. + } = change + { + let uuid_filename = match self.external_to_temp_name.get(path) { + Some(i) => i.clone(), + None => { + // This should be rare, but if we haven't mapped the source, create it with no baseline. + let i = Uuid::new_v4().to_string(); + self.baseline_file_info.insert( + i.clone(), + BaselineFileInfo { + path: path.clone(), + content: vec![], + mode: FileMode::Regular, + oid: ZERO_OID.to_string(), + }, + ); + i + } + }; + // Update current external mapping for temp file name. + self.temp_name_to_current_path + .insert(uuid_filename.clone(), dest.clone()); + // Update forward file_mapping: external current -> internal name. + self.external_to_temp_name.remove(path); + self.external_to_temp_name + .insert(dest.clone(), uuid_filename); + }; + } + } + + fn get_path_for_internal(&self, internal: &str) -> Option { + self.temp_name_to_current_path + .get(internal) + .cloned() + .or_else(|| { + self.baseline_file_info + .get(internal) + .map(|info| info.path.clone()) + }) + } + + /// Find the git worktree root for a file/directory by walking up to the first ancestor containing a `.git` entry. + /// Uses a simple cache of known roots and avoids negative-result caching for simplicity. + fn find_git_root_cached(&mut self, start: &Path) -> Option { + let dir = if start.is_dir() { + start + } else { + start.parent()? + }; + + // Fast path: if any cached root is an ancestor of this path, use it. + if let Some(root) = self + .git_root_cache + .iter() + .find(|r| dir.starts_with(r)) + .cloned() + { + return Some(root); + } + + // Walk up to find a `.git` marker. + let mut cur = dir.to_path_buf(); + loop { + let git_marker = cur.join(".git"); + if git_marker.is_dir() || git_marker.is_file() { + if !self.git_root_cache.iter().any(|r| r == &cur) { + self.git_root_cache.push(cur.clone()); + } + return Some(cur); + } + + // On Windows, avoid walking above the drive or UNC share root. + #[cfg(windows)] + { + if is_windows_drive_or_unc_root(&cur) { + return None; + } + } + + if let Some(parent) = cur.parent() { + cur = parent.to_path_buf(); + } else { + return None; + } + } + } + + /// Return a display string for `path` relative to its git root if found, else absolute. + fn relative_to_git_root_str(&mut self, path: &Path) -> String { + let s = if let Some(root) = self.find_git_root_cached(path) { + if let Ok(rel) = path.strip_prefix(&root) { + rel.display().to_string() + } else { + path.display().to_string() + } + } else { + path.display().to_string() + }; + s.replace('\\', "/") + } + + /// Ask git to compute the blob SHA-1 for the file at `path` within its repository. + /// Returns None if no repository is found or git invocation fails. + fn git_blob_oid_for_path(&mut self, path: &Path) -> Option { + let root = self.find_git_root_cached(path)?; + // Compute a path relative to the repo root for better portability across platforms. + let rel = path.strip_prefix(&root).unwrap_or(path); + let output = Command::new("git") + .arg("-C") + .arg(&root) + .arg("hash-object") + .arg("--") + .arg(rel) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let s = String::from_utf8_lossy(&output.stdout).trim().to_string(); + if s.len() == 40 { Some(s) } else { None } + } + + /// Recompute the aggregated unified diff by comparing all of the in-memory snapshots that were + /// collected before the first time they were touched by apply_patch during this turn with + /// the current repo state. + pub fn get_unified_diff(&mut self) -> Result> { + let mut aggregated = String::new(); + + // Compute diffs per tracked internal file in a stable order by external path. + let mut baseline_file_names: Vec = + self.baseline_file_info.keys().cloned().collect(); + // Sort lexicographically by full repo-relative path to match git behavior. + baseline_file_names.sort_by_key(|internal| { + self.get_path_for_internal(internal) + .map(|p| self.relative_to_git_root_str(&p)) + .unwrap_or_default() + }); + + for internal in baseline_file_names { + aggregated.push_str(self.get_file_diff(&internal).as_str()); + if !aggregated.ends_with('\n') { + aggregated.push('\n'); + } + } + + if aggregated.trim().is_empty() { + Ok(None) + } else { + Ok(Some(aggregated)) + } + } + + fn get_file_diff(&mut self, internal_file_name: &str) -> String { + let mut aggregated = String::new(); + + // Snapshot lightweight fields only. + let (baseline_external_path, baseline_mode, left_oid) = { + if let Some(info) = self.baseline_file_info.get(internal_file_name) { + (info.path.clone(), info.mode, info.oid.clone()) + } else { + (PathBuf::new(), FileMode::Regular, ZERO_OID.to_string()) + } + }; + let current_external_path = match self.get_path_for_internal(internal_file_name) { + Some(p) => p, + None => return aggregated, + }; + + let current_mode = file_mode_for_path(¤t_external_path).unwrap_or(FileMode::Regular); + let right_bytes = blob_bytes(¤t_external_path, ¤t_mode); + + // Compute displays with &mut self before borrowing any baseline content. + let left_display = self.relative_to_git_root_str(&baseline_external_path); + let right_display = self.relative_to_git_root_str(¤t_external_path); + + // Compute right oid before borrowing baseline content. + let right_oid = if let Some(b) = right_bytes.as_ref() { + if current_mode == FileMode::Symlink { + format!("{:x}", git_blob_sha1_hex_bytes(b)) + } else { + self.git_blob_oid_for_path(¤t_external_path) + .unwrap_or_else(|| format!("{:x}", git_blob_sha1_hex_bytes(b))) + } + } else { + ZERO_OID.to_string() + }; + + // Borrow baseline content only after all &mut self uses are done. + let left_present = left_oid.as_str() != ZERO_OID; + let left_bytes: Option<&[u8]> = if left_present { + self.baseline_file_info + .get(internal_file_name) + .map(|i| i.content.as_slice()) + } else { + None + }; + + // Fast path: identical bytes or both missing. + if left_bytes == right_bytes.as_deref() { + return aggregated; + } + + aggregated.push_str(&format!("diff --git a/{left_display} b/{right_display}\n")); + + let is_add = !left_present && right_bytes.is_some(); + let is_delete = left_present && right_bytes.is_none(); + + if is_add { + aggregated.push_str(&format!("new file mode {current_mode}\n")); + } else if is_delete { + aggregated.push_str(&format!("deleted file mode {baseline_mode}\n")); + } else if baseline_mode != current_mode { + aggregated.push_str(&format!("old mode {baseline_mode}\n")); + aggregated.push_str(&format!("new mode {current_mode}\n")); + } + + let left_text = left_bytes.and_then(|b| std::str::from_utf8(b).ok()); + let right_text = right_bytes + .as_deref() + .and_then(|b| std::str::from_utf8(b).ok()); + + let can_text_diff = matches!( + (left_text, right_text, is_add, is_delete), + (Some(_), Some(_), _, _) | (_, Some(_), true, _) | (Some(_), _, _, true) + ); + + if can_text_diff { + let l = left_text.unwrap_or(""); + let r = right_text.unwrap_or(""); + + aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); + + let old_header = if left_present { + format!("a/{left_display}") + } else { + DEV_NULL.to_string() + }; + let new_header = if right_bytes.is_some() { + format!("b/{right_display}") + } else { + DEV_NULL.to_string() + }; + + let diff = similar::TextDiff::from_lines(l, r); + let unified = diff + .unified_diff() + .context_radius(3) + .header(&old_header, &new_header) + .to_string(); + + aggregated.push_str(&unified); + } else { + aggregated.push_str(&format!("index {left_oid}..{right_oid}\n")); + let old_header = if left_present { + format!("a/{left_display}") + } else { + DEV_NULL.to_string() + }; + let new_header = if right_bytes.is_some() { + format!("b/{right_display}") + } else { + DEV_NULL.to_string() + }; + aggregated.push_str(&format!("--- {old_header}\n")); + aggregated.push_str(&format!("+++ {new_header}\n")); + aggregated.push_str("Binary files differ\n"); + } + aggregated + } +} + +/// Compute the Git SHA-1 blob object ID for the given content (bytes). +fn git_blob_sha1_hex_bytes(data: &[u8]) -> Output { + // Git blob hash is sha1 of: "blob \0" + let header = format!("blob {}\0", data.len()); + use sha1::Digest; + let mut hasher = sha1::Sha1::new(); + hasher.update(header.as_bytes()); + hasher.update(data); + hasher.finalize() +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum FileMode { + Regular, + #[cfg(unix)] + Executable, + Symlink, +} + +impl FileMode { + fn as_str(&self) -> &'static str { + match self { + FileMode::Regular => "100644", + #[cfg(unix)] + FileMode::Executable => "100755", + FileMode::Symlink => "120000", + } + } +} + +impl std::fmt::Display for FileMode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.as_str()) + } +} + +#[cfg(unix)] +fn file_mode_for_path(path: &Path) -> Option { + use std::os::unix::fs::PermissionsExt; + let meta = fs::symlink_metadata(path).ok()?; + let ft = meta.file_type(); + if ft.is_symlink() { + return Some(FileMode::Symlink); + } + let mode = meta.permissions().mode(); + let is_exec = (mode & 0o111) != 0; + Some(if is_exec { + FileMode::Executable + } else { + FileMode::Regular + }) +} + +#[cfg(not(unix))] +fn file_mode_for_path(_path: &Path) -> Option { + // Default to non-executable on non-unix. + Some(FileMode::Regular) +} + +fn blob_bytes(path: &Path, mode: &FileMode) -> Option> { + if path.exists() { + let contents = if *mode == FileMode::Symlink { + symlink_blob_bytes(path) + .ok_or_else(|| anyhow!("failed to read symlink target for {}", path.display())) + } else { + fs::read(path) + .with_context(|| format!("failed to read current file for diff {}", path.display())) + }; + contents.ok() + } else { + None + } +} + +#[cfg(unix)] +fn symlink_blob_bytes(path: &Path) -> Option> { + use std::os::unix::ffi::OsStrExt; + let target = std::fs::read_link(path).ok()?; + Some(target.as_os_str().as_bytes().to_vec()) +} + +#[cfg(not(unix))] +fn symlink_blob_bytes(_path: &Path) -> Option> { + None +} + +#[cfg(windows)] +fn is_windows_drive_or_unc_root(p: &std::path::Path) -> bool { + use std::path::Component; + let mut comps = p.components(); + matches!( + (comps.next(), comps.next(), comps.next()), + (Some(Component::Prefix(_)), Some(Component::RootDir), None) + ) +} + +#[cfg(test)] +mod tests { + #![allow(clippy::unwrap_used)] + use super::*; + use pretty_assertions::assert_eq; + use tempfile::tempdir; + + /// Compute the Git SHA-1 blob object ID for the given content (string). + /// This delegates to the bytes version to avoid UTF-8 lossy conversions here. + fn git_blob_sha1_hex(data: &str) -> String { + format!("{:x}", git_blob_sha1_hex_bytes(data.as_bytes())) + } + + fn normalize_diff_for_test(input: &str, root: &Path) -> String { + let root_str = root.display().to_string().replace('\\', "/"); + let replaced = input.replace(&root_str, ""); + // Split into blocks on lines starting with "diff --git ", sort blocks for determinism, and rejoin + let mut blocks: Vec = Vec::new(); + let mut current = String::new(); + for line in replaced.lines() { + if line.starts_with("diff --git ") && !current.is_empty() { + blocks.push(current); + current = String::new(); + } + if !current.is_empty() { + current.push('\n'); + } + current.push_str(line); + } + if !current.is_empty() { + blocks.push(current); + } + blocks.sort(); + let mut out = blocks.join("\n"); + if !out.ends_with('\n') { + out.push('\n'); + } + out + } + + #[test] + fn accumulates_add_and_update() { + let mut acc = TurnDiffTracker::new(); + + let dir = tempdir().unwrap(); + let file = dir.path().join("a.txt"); + + // First patch: add file (baseline should be /dev/null). + let add_changes = HashMap::from([( + file.clone(), + FileChange::Add { + content: "foo\n".to_string(), + }, + )]); + acc.on_patch_begin(&add_changes); + + // Simulate apply: create the file on disk. + fs::write(&file, "foo\n").unwrap(); + let first = acc.get_unified_diff().unwrap().unwrap(); + let first = normalize_diff_for_test(&first, dir.path()); + let expected_first = { + let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); + let right_oid = git_blob_sha1_hex("foo\n"); + format!( + r#"diff --git a//a.txt b//a.txt +new file mode {mode} +index {ZERO_OID}..{right_oid} +--- {DEV_NULL} ++++ b//a.txt +@@ -0,0 +1 @@ ++foo +"#, + ) + }; + assert_eq!(first, expected_first); + + // Second patch: update the file on disk. + let update_changes = HashMap::from([( + file.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_changes); + + // Simulate apply: append a new line. + fs::write(&file, "foo\nbar\n").unwrap(); + let combined = acc.get_unified_diff().unwrap().unwrap(); + let combined = normalize_diff_for_test(&combined, dir.path()); + let expected_combined = { + let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); + let right_oid = git_blob_sha1_hex("foo\nbar\n"); + format!( + r#"diff --git a//a.txt b//a.txt +new file mode {mode} +index {ZERO_OID}..{right_oid} +--- {DEV_NULL} ++++ b//a.txt +@@ -0,0 +1,2 @@ ++foo ++bar +"#, + ) + }; + assert_eq!(combined, expected_combined); + } + + #[test] + fn accumulates_delete() { + let dir = tempdir().unwrap(); + let file = dir.path().join("b.txt"); + fs::write(&file, "x\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + let del_changes = HashMap::from([(file.clone(), FileChange::Delete)]); + acc.on_patch_begin(&del_changes); + + // Simulate apply: delete the file from disk. + let baseline_mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); + fs::remove_file(&file).unwrap(); + let diff = acc.get_unified_diff().unwrap().unwrap(); + let diff = normalize_diff_for_test(&diff, dir.path()); + let expected = { + let left_oid = git_blob_sha1_hex("x\n"); + format!( + r#"diff --git a//b.txt b//b.txt +deleted file mode {baseline_mode} +index {left_oid}..{ZERO_OID} +--- a//b.txt ++++ {DEV_NULL} +@@ -1 +0,0 @@ +-x +"#, + ) + }; + assert_eq!(diff, expected); + } + + #[test] + fn accumulates_move_and_update() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src.txt"); + let dest = dir.path().join("dst.txt"); + fs::write(&src, "line\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + let mv_changes = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv_changes); + + // Simulate apply: move and update content. + fs::rename(&src, &dest).unwrap(); + fs::write(&dest, "line2\n").unwrap(); + + let out = acc.get_unified_diff().unwrap().unwrap(); + let out = normalize_diff_for_test(&out, dir.path()); + let expected = { + let left_oid = git_blob_sha1_hex("line\n"); + let right_oid = git_blob_sha1_hex("line2\n"); + format!( + r#"diff --git a//src.txt b//dst.txt +index {left_oid}..{right_oid} +--- a//src.txt ++++ b//dst.txt +@@ -1 +1 @@ +-line ++line2 +"# + ) + }; + assert_eq!(out, expected); + } + + #[test] + fn move_without_1change_yields_no_diff() { + let dir = tempdir().unwrap(); + let src = dir.path().join("moved.txt"); + let dest = dir.path().join("renamed.txt"); + fs::write(&src, "same\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + let mv_changes = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv_changes); + + // Simulate apply: move only, no content change. + fs::rename(&src, &dest).unwrap(); + + let diff = acc.get_unified_diff().unwrap(); + assert_eq!(diff, None); + } + + #[test] + fn move_declared_but_file_only_appears_at_dest_is_add() { + let dir = tempdir().unwrap(); + let src = dir.path().join("src.txt"); + let dest = dir.path().join("dest.txt"); + let mut acc = TurnDiffTracker::new(); + let mv = HashMap::from([( + src.clone(), + FileChange::Update { + unified_diff: "".into(), + move_path: Some(dest.clone()), + }, + )]); + acc.on_patch_begin(&mv); + // No file existed initially; create only dest + fs::write(&dest, "hello\n").unwrap(); + let diff = acc.get_unified_diff().unwrap().unwrap(); + let diff = normalize_diff_for_test(&diff, dir.path()); + let expected = { + let mode = file_mode_for_path(&dest).unwrap_or(FileMode::Regular); + let right_oid = git_blob_sha1_hex("hello\n"); + format!( + r#"diff --git a//src.txt b//dest.txt +new file mode {mode} +index {ZERO_OID}..{right_oid} +--- {DEV_NULL} ++++ b//dest.txt +@@ -0,0 +1 @@ ++hello +"#, + ) + }; + assert_eq!(diff, expected); + } + + #[test] + fn update_persists_across_new_baseline_for_new_file() { + let dir = tempdir().unwrap(); + let a = dir.path().join("a.txt"); + let b = dir.path().join("b.txt"); + fs::write(&a, "foo\n").unwrap(); + fs::write(&b, "z\n").unwrap(); + + let mut acc = TurnDiffTracker::new(); + + // First: update existing a.txt (baseline snapshot is created for a). + let update_a = HashMap::from([( + a.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_a); + // Simulate apply: modify a.txt on disk. + fs::write(&a, "foo\nbar\n").unwrap(); + let first = acc.get_unified_diff().unwrap().unwrap(); + let first = normalize_diff_for_test(&first, dir.path()); + let expected_first = { + let left_oid = git_blob_sha1_hex("foo\n"); + let right_oid = git_blob_sha1_hex("foo\nbar\n"); + format!( + r#"diff --git a//a.txt b//a.txt +index {left_oid}..{right_oid} +--- a//a.txt ++++ b//a.txt +@@ -1 +1,2 @@ + foo ++bar +"# + ) + }; + assert_eq!(first, expected_first); + + // Next: introduce a brand-new path b.txt into baseline snapshots via a delete change. + let del_b = HashMap::from([(b.clone(), FileChange::Delete)]); + acc.on_patch_begin(&del_b); + // Simulate apply: delete b.txt. + let baseline_mode = file_mode_for_path(&b).unwrap_or(FileMode::Regular); + fs::remove_file(&b).unwrap(); + + let combined = acc.get_unified_diff().unwrap().unwrap(); + let combined = normalize_diff_for_test(&combined, dir.path()); + let expected = { + let left_oid_a = git_blob_sha1_hex("foo\n"); + let right_oid_a = git_blob_sha1_hex("foo\nbar\n"); + let left_oid_b = git_blob_sha1_hex("z\n"); + format!( + r#"diff --git a//a.txt b//a.txt +index {left_oid_a}..{right_oid_a} +--- a//a.txt ++++ b//a.txt +@@ -1 +1,2 @@ + foo ++bar +diff --git a//b.txt b//b.txt +deleted file mode {baseline_mode} +index {left_oid_b}..{ZERO_OID} +--- a//b.txt ++++ {DEV_NULL} +@@ -1 +0,0 @@ +-z +"#, + ) + }; + assert_eq!(combined, expected); + } + + #[test] + fn binary_files_differ_update() { + let dir = tempdir().unwrap(); + let file = dir.path().join("bin.dat"); + + // Initial non-UTF8 bytes + let left_bytes: Vec = vec![0xff, 0xfe, 0xfd, 0x00]; + // Updated non-UTF8 bytes + let right_bytes: Vec = vec![0x01, 0x02, 0x03, 0x00]; + + fs::write(&file, &left_bytes).unwrap(); + + let mut acc = TurnDiffTracker::new(); + let update_changes = HashMap::from([( + file.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_changes); + + // Apply update on disk + fs::write(&file, &right_bytes).unwrap(); + + let diff = acc.get_unified_diff().unwrap().unwrap(); + let diff = normalize_diff_for_test(&diff, dir.path()); + let expected = { + let left_oid = format!("{:x}", git_blob_sha1_hex_bytes(&left_bytes)); + let right_oid = format!("{:x}", git_blob_sha1_hex_bytes(&right_bytes)); + format!( + r#"diff --git a//bin.dat b//bin.dat +index {left_oid}..{right_oid} +--- a//bin.dat ++++ b//bin.dat +Binary files differ +"# + ) + }; + assert_eq!(diff, expected); + } + + #[test] + fn filenames_with_spaces_add_and_update() { + let mut acc = TurnDiffTracker::new(); + + let dir = tempdir().unwrap(); + let file = dir.path().join("name with spaces.txt"); + + // First patch: add file (baseline should be /dev/null). + let add_changes = HashMap::from([( + file.clone(), + FileChange::Add { + content: "foo\n".to_string(), + }, + )]); + acc.on_patch_begin(&add_changes); + + // Simulate apply: create the file on disk. + fs::write(&file, "foo\n").unwrap(); + let first = acc.get_unified_diff().unwrap().unwrap(); + let first = normalize_diff_for_test(&first, dir.path()); + let expected_first = { + let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); + let right_oid = git_blob_sha1_hex("foo\n"); + format!( + r#"diff --git a//name with spaces.txt b//name with spaces.txt +new file mode {mode} +index {ZERO_OID}..{right_oid} +--- {DEV_NULL} ++++ b//name with spaces.txt +@@ -0,0 +1 @@ ++foo +"#, + ) + }; + assert_eq!(first, expected_first); + + // Second patch: update the file on disk. + let update_changes = HashMap::from([( + file.clone(), + FileChange::Update { + unified_diff: "".to_owned(), + move_path: None, + }, + )]); + acc.on_patch_begin(&update_changes); + + // Simulate apply: append a new line with a space. + fs::write(&file, "foo\nbar baz\n").unwrap(); + let combined = acc.get_unified_diff().unwrap().unwrap(); + let combined = normalize_diff_for_test(&combined, dir.path()); + let expected_combined = { + let mode = file_mode_for_path(&file).unwrap_or(FileMode::Regular); + let right_oid = git_blob_sha1_hex("foo\nbar baz\n"); + format!( + r#"diff --git a//name with spaces.txt b//name with spaces.txt +new file mode {mode} +index {ZERO_OID}..{right_oid} +--- {DEV_NULL} ++++ b//name with spaces.txt +@@ -0,0 +1,2 @@ ++foo ++bar baz +"#, + ) + }; + assert_eq!(combined, expected_combined); + } +} diff --git a/codex-rs/exec/src/event_processor_with_human_output.rs b/codex-rs/exec/src/event_processor_with_human_output.rs index 72e2f929..c290d933 100644 --- a/codex-rs/exec/src/event_processor_with_human_output.rs +++ b/codex-rs/exec/src/event_processor_with_human_output.rs @@ -20,6 +20,7 @@ use codex_core::protocol::PatchApplyEndEvent; use codex_core::protocol::SessionConfiguredEvent; use codex_core::protocol::TaskCompleteEvent; use codex_core::protocol::TokenUsage; +use codex_core::protocol::TurnDiffEvent; use owo_colors::OwoColorize; use owo_colors::Style; use shlex::try_join; @@ -399,6 +400,7 @@ impl EventProcessor for EventProcessorWithHumanOutput { stdout, stderr, success, + .. }) => { let patch_begin = self.call_id_to_patch.remove(&call_id); @@ -428,6 +430,10 @@ impl EventProcessor for EventProcessorWithHumanOutput { println!("{}", line.style(self.dimmed)); } } + EventMsg::TurnDiff(TurnDiffEvent { unified_diff }) => { + ts_println!(self, "{}", "turn diff:".style(self.magenta)); + println!("{unified_diff}"); + } EventMsg::ExecApprovalRequest(_) => { // Should we exit? } diff --git a/codex-rs/mcp-server/src/codex_tool_runner.rs b/codex-rs/mcp-server/src/codex_tool_runner.rs index d489ffe0..205dfa46 100644 --- a/codex-rs/mcp-server/src/codex_tool_runner.rs +++ b/codex-rs/mcp-server/src/codex_tool_runner.rs @@ -263,6 +263,7 @@ async fn run_codex_tool_session_inner( | EventMsg::BackgroundEvent(_) | EventMsg::PatchApplyBegin(_) | EventMsg::PatchApplyEnd(_) + | EventMsg::TurnDiff(_) | EventMsg::GetHistoryEntryResponse(_) | EventMsg::PlanUpdate(_) | EventMsg::ShutdownComplete => { diff --git a/codex-rs/mcp-server/src/conversation_loop.rs b/codex-rs/mcp-server/src/conversation_loop.rs index 53427518..1db39a23 100644 --- a/codex-rs/mcp-server/src/conversation_loop.rs +++ b/codex-rs/mcp-server/src/conversation_loop.rs @@ -97,6 +97,7 @@ pub async fn run_conversation_loop( | EventMsg::McpToolCallEnd(_) | EventMsg::ExecCommandBegin(_) | EventMsg::ExecCommandEnd(_) + | EventMsg::TurnDiff(_) | EventMsg::BackgroundEvent(_) | EventMsg::ExecCommandOutputDelta(_) | EventMsg::PatchApplyBegin(_)