From 84e01f4b6215dd2067447475a85a9a42b0211bb8 Mon Sep 17 00:00:00 2001 From: Sebastian Lund Date: Fri, 16 May 2025 12:12:16 -0400 Subject: [PATCH] fix: apply patch issue when using different cwd (#942) If you run a codex instance outside of the current working directory from where you launched the codex binary it won't be able to apply patches correctly, even if the sandbox policy allows it. This manifests weird behaviours, such as * Reading the same filename in the binary working directory, and overwriting it in the session working directory. e.g. if you have a `readme` in both folders it will overwrite the readme in the session working directory with the readme in the binary working directory *applied with the suggested patch*. * The LLM ends up in weird loops trying to verify and debug why the apply_patch won't work, and it can result in it applying patches by manually writing python or javascript if it figures out that either is supported by the system instead. I added a test-case to ensure that the patch contents are based on the cwd. ## Issue: mixing relative & absolute paths in apply_patch 1. The apply_patch tool use relative paths based on the session working directory. 2. `unified_diff_from_chunks` eventually ends up [reading the source file](https://github.com/reflectionai/codex/blob/main/codex-rs/apply-patch/src/lib.rs#L410) to figure out what the diff is, by using the relative path. 3. The changes are targeted using an absolute path derived from the current working directory. The end-result in case session working directory differs from the binary working directory: we get the diff for a file relative to the binary working directory, and apply it on a file in the session working directory. --- codex-rs/apply-patch/src/lib.rs | 96 +++++++++++++++++++++++++----- codex-rs/apply-patch/src/parser.rs | 12 ++++ 2 files changed, 92 insertions(+), 16 deletions(-) diff --git a/codex-rs/apply-patch/src/lib.rs b/codex-rs/apply-patch/src/lib.rs index afabcea4..a144f0b4 100644 --- a/codex-rs/apply-patch/src/lib.rs +++ b/codex-rs/apply-patch/src/lib.rs @@ -18,7 +18,7 @@ use thiserror::Error; use tree_sitter::Parser; use tree_sitter_bash::LANGUAGE as BASH; -#[derive(Debug, Error)] +#[derive(Debug, Error, PartialEq)] pub enum ApplyPatchError { #[error(transparent)] ParseError(#[from] ParseError), @@ -46,6 +46,12 @@ pub struct IoError { source: std::io::Error, } +impl PartialEq for IoError { + fn eq(&self, other: &Self) -> bool { + self.context == other.context && self.source.to_string() == other.source.to_string() + } +} + #[derive(Debug)] pub enum MaybeApplyPatch { Body(Vec), @@ -77,7 +83,7 @@ pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch { } } -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ApplyPatchFileChange { Add { content: String, @@ -106,7 +112,27 @@ pub enum MaybeApplyPatchVerified { NotApplyPatch, } -#[derive(Debug)] +impl PartialEq for MaybeApplyPatchVerified { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (MaybeApplyPatchVerified::Body(a), MaybeApplyPatchVerified::Body(b)) => a == b, + ( + MaybeApplyPatchVerified::ShellParseError(a), + MaybeApplyPatchVerified::ShellParseError(b), + ) => a.to_string() == b.to_string(), + ( + MaybeApplyPatchVerified::CorrectnessError(a), + MaybeApplyPatchVerified::CorrectnessError(b), + ) => a == b, + (MaybeApplyPatchVerified::NotApplyPatch, MaybeApplyPatchVerified::NotApplyPatch) => { + true + } + _ => false, + } + } +} + +#[derive(Debug, PartialEq)] /// ApplyPatchAction is the result of parsing an `apply_patch` command. By /// construction, all paths should be absolute paths. pub struct ApplyPatchAction { @@ -142,22 +168,16 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp MaybeApplyPatch::Body(hunks) => { let mut changes = HashMap::new(); for hunk in hunks { + let path = hunk.resolve_path(cwd); match hunk { - Hunk::AddFile { path, contents } => { - changes.insert( - cwd.join(path), - ApplyPatchFileChange::Add { - content: contents.clone(), - }, - ); + Hunk::AddFile { contents, .. } => { + changes.insert(path, ApplyPatchFileChange::Add { content: contents }); } - Hunk::DeleteFile { path } => { - changes.insert(cwd.join(path), ApplyPatchFileChange::Delete); + Hunk::DeleteFile { .. } => { + changes.insert(path, ApplyPatchFileChange::Delete); } Hunk::UpdateFile { - path, - move_path, - chunks, + move_path, chunks, .. } => { let ApplyPatchFileUpdate { unified_diff, @@ -169,7 +189,7 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp } }; changes.insert( - cwd.join(path), + path, ApplyPatchFileChange::Update { unified_diff, move_path: move_path.map(|p| cwd.join(p)), @@ -1137,4 +1157,48 @@ g "# ); } + + #[test] + fn test_apply_patch_should_resolve_absolute_paths_in_cwd() { + let session_dir = tempdir().unwrap(); + let relative_path = "source.txt"; + + // Note that we need this file to exist for the patch to be "verified" + // and parsed correctly. + let session_file_path = session_dir.path().join(relative_path); + fs::write(&session_file_path, "session directory content\n").unwrap(); + + let argv = vec![ + "apply_patch".to_string(), + r#"*** Begin Patch +*** Update File: source.txt +@@ +-session directory content ++updated session directory content +*** End Patch"# + .to_string(), + ]; + + let result = maybe_parse_apply_patch_verified(&argv, session_dir.path()); + + // Verify the patch contents - as otherwise we may have pulled contents + // from the wrong file (as we're using relative paths) + assert_eq!( + result, + MaybeApplyPatchVerified::Body(ApplyPatchAction { + changes: HashMap::from([( + session_dir.path().join(relative_path), + ApplyPatchFileChange::Update { + unified_diff: r#"@@ -1 +1 @@ +-session directory content ++updated session directory content +"# + .to_string(), + move_path: None, + new_content: "updated session directory content\n".to_string(), + }, + )]), + }) + ); + } } diff --git a/codex-rs/apply-patch/src/parser.rs b/codex-rs/apply-patch/src/parser.rs index 40547b42..391255de 100644 --- a/codex-rs/apply-patch/src/parser.rs +++ b/codex-rs/apply-patch/src/parser.rs @@ -22,6 +22,7 @@ //! //! The parser below is a little more lenient than the explicit spec and allows for //! leading/trailing whitespace around patch markers. +use std::path::Path; use std::path::PathBuf; use thiserror::Error; @@ -64,6 +65,17 @@ pub enum Hunk { chunks: Vec, }, } + +impl Hunk { + pub fn resolve_path(&self, cwd: &Path) -> PathBuf { + match self { + Hunk::AddFile { path, .. } => cwd.join(path), + Hunk::DeleteFile { path } => cwd.join(path), + Hunk::UpdateFile { path, .. } => cwd.join(path), + } + } +} + use Hunk::*; #[derive(Debug, PartialEq)]