fix: apply patch issue when using different cwd (#942)

If you run a codex instance outside of the current working directory
from where you launched the codex binary it won't be able to apply
patches correctly, even if the sandbox policy allows it. This manifests
weird behaviours, such as

* Reading the same filename in the binary working directory, and
overwriting it in the session working directory. e.g. if you have a
`readme` in both folders it will overwrite the readme in the session
working directory with the readme in the binary working directory
*applied with the suggested patch*.
* The LLM ends up in weird loops trying to verify and debug why the
apply_patch won't work, and it can result in it applying patches by
manually writing python or javascript if it figures out that either is
supported by the system instead.

I added a test-case to ensure that the patch contents are based on the
cwd.

## Issue: mixing relative & absolute paths in apply_patch

1. The apply_patch tool use relative paths based on the session working
directory.
2. `unified_diff_from_chunks` eventually ends up [reading the source
file](https://github.com/reflectionai/codex/blob/main/codex-rs/apply-patch/src/lib.rs#L410)
to figure out what the diff is, by using the relative path.
3. The changes are targeted using an absolute path derived from the
current working directory.

The end-result in case session working directory differs from the binary
working directory: we get the diff for a file relative to the binary
working directory, and apply it on a file in the session working
directory.
This commit is contained in:
Sebastian Lund
2025-05-16 12:12:16 -04:00
committed by GitHub
parent 7edfbae062
commit 84e01f4b62
2 changed files with 92 additions and 16 deletions

View File

@@ -18,7 +18,7 @@ use thiserror::Error;
use tree_sitter::Parser;
use tree_sitter_bash::LANGUAGE as BASH;
#[derive(Debug, Error)]
#[derive(Debug, Error, PartialEq)]
pub enum ApplyPatchError {
#[error(transparent)]
ParseError(#[from] ParseError),
@@ -46,6 +46,12 @@ pub struct IoError {
source: std::io::Error,
}
impl PartialEq for IoError {
fn eq(&self, other: &Self) -> bool {
self.context == other.context && self.source.to_string() == other.source.to_string()
}
}
#[derive(Debug)]
pub enum MaybeApplyPatch {
Body(Vec<Hunk>),
@@ -77,7 +83,7 @@ pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
}
}
#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub enum ApplyPatchFileChange {
Add {
content: String,
@@ -106,7 +112,27 @@ pub enum MaybeApplyPatchVerified {
NotApplyPatch,
}
#[derive(Debug)]
impl PartialEq for MaybeApplyPatchVerified {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(MaybeApplyPatchVerified::Body(a), MaybeApplyPatchVerified::Body(b)) => a == b,
(
MaybeApplyPatchVerified::ShellParseError(a),
MaybeApplyPatchVerified::ShellParseError(b),
) => a.to_string() == b.to_string(),
(
MaybeApplyPatchVerified::CorrectnessError(a),
MaybeApplyPatchVerified::CorrectnessError(b),
) => a == b,
(MaybeApplyPatchVerified::NotApplyPatch, MaybeApplyPatchVerified::NotApplyPatch) => {
true
}
_ => false,
}
}
}
#[derive(Debug, PartialEq)]
/// ApplyPatchAction is the result of parsing an `apply_patch` command. By
/// construction, all paths should be absolute paths.
pub struct ApplyPatchAction {
@@ -142,22 +168,16 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp
MaybeApplyPatch::Body(hunks) => {
let mut changes = HashMap::new();
for hunk in hunks {
let path = hunk.resolve_path(cwd);
match hunk {
Hunk::AddFile { path, contents } => {
changes.insert(
cwd.join(path),
ApplyPatchFileChange::Add {
content: contents.clone(),
},
);
Hunk::AddFile { contents, .. } => {
changes.insert(path, ApplyPatchFileChange::Add { content: contents });
}
Hunk::DeleteFile { path } => {
changes.insert(cwd.join(path), ApplyPatchFileChange::Delete);
Hunk::DeleteFile { .. } => {
changes.insert(path, ApplyPatchFileChange::Delete);
}
Hunk::UpdateFile {
path,
move_path,
chunks,
move_path, chunks, ..
} => {
let ApplyPatchFileUpdate {
unified_diff,
@@ -169,7 +189,7 @@ pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApp
}
};
changes.insert(
cwd.join(path),
path,
ApplyPatchFileChange::Update {
unified_diff,
move_path: move_path.map(|p| cwd.join(p)),
@@ -1137,4 +1157,48 @@ g
"#
);
}
#[test]
fn test_apply_patch_should_resolve_absolute_paths_in_cwd() {
let session_dir = tempdir().unwrap();
let relative_path = "source.txt";
// Note that we need this file to exist for the patch to be "verified"
// and parsed correctly.
let session_file_path = session_dir.path().join(relative_path);
fs::write(&session_file_path, "session directory content\n").unwrap();
let argv = vec![
"apply_patch".to_string(),
r#"*** Begin Patch
*** Update File: source.txt
@@
-session directory content
+updated session directory content
*** End Patch"#
.to_string(),
];
let result = maybe_parse_apply_patch_verified(&argv, session_dir.path());
// Verify the patch contents - as otherwise we may have pulled contents
// from the wrong file (as we're using relative paths)
assert_eq!(
result,
MaybeApplyPatchVerified::Body(ApplyPatchAction {
changes: HashMap::from([(
session_dir.path().join(relative_path),
ApplyPatchFileChange::Update {
unified_diff: r#"@@ -1 +1 @@
-session directory content
+updated session directory content
"#
.to_string(),
move_path: None,
new_content: "updated session directory content\n".to_string(),
},
)]),
})
);
}
}

View File

@@ -22,6 +22,7 @@
//!
//! The parser below is a little more lenient than the explicit spec and allows for
//! leading/trailing whitespace around patch markers.
use std::path::Path;
use std::path::PathBuf;
use thiserror::Error;
@@ -64,6 +65,17 @@ pub enum Hunk {
chunks: Vec<UpdateFileChunk>,
},
}
impl Hunk {
pub fn resolve_path(&self, cwd: &Path) -> PathBuf {
match self {
Hunk::AddFile { path, .. } => cwd.join(path),
Hunk::DeleteFile { path } => cwd.join(path),
Hunk::UpdateFile { path, .. } => cwd.join(path),
}
}
}
use Hunk::*;
#[derive(Debug, PartialEq)]