Phase 1: Repository & Infrastructure Setup
- Renamed directories: codex-rs -> llmx-rs, codex-cli -> llmx-cli
- Updated package.json files:
- Root: llmx-monorepo
- CLI: @llmx/llmx
- SDK: @llmx/llmx-sdk
- Updated pnpm workspace configuration
- Renamed binary: codex.js -> llmx.js
- Updated environment variables: CODEX_* -> LLMX_*
- Changed repository URLs to valknar/llmx
🤖 Generated with Claude Code
This commit is contained in:
1672
llmx-rs/apply-patch/src/lib.rs
Normal file
1672
llmx-rs/apply-patch/src/lib.rs
Normal file
File diff suppressed because it is too large
Load Diff
3
llmx-rs/apply-patch/src/main.rs
Normal file
3
llmx-rs/apply-patch/src/main.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub fn main() -> ! {
|
||||
codex_apply_patch::main()
|
||||
}
|
||||
741
llmx-rs/apply-patch/src/parser.rs
Normal file
741
llmx-rs/apply-patch/src/parser.rs
Normal file
@@ -0,0 +1,741 @@
|
||||
//! This module is responsible for parsing & validating a patch into a list of "hunks".
|
||||
//! (It does not attempt to actually check that the patch can be applied to the filesystem.)
|
||||
//!
|
||||
//! The official Lark grammar for the apply-patch format is:
|
||||
//!
|
||||
//! start: begin_patch hunk+ end_patch
|
||||
//! begin_patch: "*** Begin Patch" LF
|
||||
//! end_patch: "*** End Patch" LF?
|
||||
//!
|
||||
//! hunk: add_hunk | delete_hunk | update_hunk
|
||||
//! add_hunk: "*** Add File: " filename LF add_line+
|
||||
//! delete_hunk: "*** Delete File: " filename LF
|
||||
//! update_hunk: "*** Update File: " filename LF change_move? change?
|
||||
//! filename: /(.+)/
|
||||
//! add_line: "+" /(.+)/ LF -> line
|
||||
//!
|
||||
//! change_move: "*** Move to: " filename LF
|
||||
//! change: (change_context | change_line)+ eof_line?
|
||||
//! change_context: ("@@" | "@@ " /(.+)/) LF
|
||||
//! change_line: ("+" | "-" | " ") /(.+)/ LF
|
||||
//! eof_line: "*** End of File" LF
|
||||
//!
|
||||
//! The parser below is a little more lenient than the explicit spec and allows for
|
||||
//! leading/trailing whitespace around patch markers.
|
||||
use crate::ApplyPatchArgs;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
|
||||
const END_PATCH_MARKER: &str = "*** End Patch";
|
||||
const ADD_FILE_MARKER: &str = "*** Add File: ";
|
||||
const DELETE_FILE_MARKER: &str = "*** Delete File: ";
|
||||
const UPDATE_FILE_MARKER: &str = "*** Update File: ";
|
||||
const MOVE_TO_MARKER: &str = "*** Move to: ";
|
||||
const EOF_MARKER: &str = "*** End of File";
|
||||
const CHANGE_CONTEXT_MARKER: &str = "@@ ";
|
||||
const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";
|
||||
|
||||
/// Currently, the only OpenAI model that knowingly requires lenient parsing is
|
||||
/// gpt-4.1. While we could try to require everyone to pass in a strictness
|
||||
/// param when invoking apply_patch, it is a pain to thread it through all of
|
||||
/// the call sites, so we resign ourselves allowing lenient parsing for all
|
||||
/// models. See [`ParseMode::Lenient`] for details on the exceptions we make for
|
||||
/// gpt-4.1.
|
||||
const PARSE_IN_STRICT_MODE: bool = false;
|
||||
|
||||
#[derive(Debug, PartialEq, Error, Clone)]
|
||||
pub enum ParseError {
|
||||
#[error("invalid patch: {0}")]
|
||||
InvalidPatchError(String),
|
||||
#[error("invalid hunk at line {line_number}, {message}")]
|
||||
InvalidHunkError { message: String, line_number: usize },
|
||||
}
|
||||
use ParseError::*;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
pub enum Hunk {
|
||||
AddFile {
|
||||
path: PathBuf,
|
||||
contents: String,
|
||||
},
|
||||
DeleteFile {
|
||||
path: PathBuf,
|
||||
},
|
||||
UpdateFile {
|
||||
path: PathBuf,
|
||||
move_path: Option<PathBuf>,
|
||||
|
||||
/// Chunks should be in order, i.e. the `change_context` of one chunk
|
||||
/// should occur later in the file than the previous chunk.
|
||||
chunks: Vec<UpdateFileChunk>,
|
||||
},
|
||||
}
|
||||
|
||||
impl Hunk {
|
||||
pub fn resolve_path(&self, cwd: &Path) -> PathBuf {
|
||||
match self {
|
||||
Hunk::AddFile { path, .. } => cwd.join(path),
|
||||
Hunk::DeleteFile { path } => cwd.join(path),
|
||||
Hunk::UpdateFile { path, .. } => cwd.join(path),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
use Hunk::*;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct UpdateFileChunk {
|
||||
/// A single line of context used to narrow down the position of the chunk
|
||||
/// (this is usually a class, method, or function definition.)
|
||||
pub change_context: Option<String>,
|
||||
|
||||
/// A contiguous block of lines that should be replaced with `new_lines`.
|
||||
/// `old_lines` must occur strictly after `change_context`.
|
||||
pub old_lines: Vec<String>,
|
||||
pub new_lines: Vec<String>,
|
||||
|
||||
/// If set to true, `old_lines` must occur at the end of the source file.
|
||||
/// (Tolerance around trailing newlines should be encouraged.)
|
||||
pub is_end_of_file: bool,
|
||||
}
|
||||
|
||||
pub fn parse_patch(patch: &str) -> Result<ApplyPatchArgs, ParseError> {
|
||||
let mode = if PARSE_IN_STRICT_MODE {
|
||||
ParseMode::Strict
|
||||
} else {
|
||||
ParseMode::Lenient
|
||||
};
|
||||
parse_patch_text(patch, mode)
|
||||
}
|
||||
|
||||
enum ParseMode {
|
||||
/// Parse the patch text argument as is.
|
||||
Strict,
|
||||
|
||||
/// GPT-4.1 is known to formulate the `command` array for the `local_shell`
|
||||
/// tool call for `apply_patch` call using something like the following:
|
||||
///
|
||||
/// ```json
|
||||
/// [
|
||||
/// "apply_patch",
|
||||
/// "<<'EOF'\n*** Begin Patch\n*** Update File: README.md\n@@...\n*** End Patch\nEOF\n",
|
||||
/// ]
|
||||
/// ```
|
||||
///
|
||||
/// This is a problem because `local_shell` is a bit of a misnomer: the
|
||||
/// `command` is not invoked by passing the arguments to a shell like Bash,
|
||||
/// but are invoked using something akin to `execvpe(3)`.
|
||||
///
|
||||
/// This is significant in this case because where a shell would interpret
|
||||
/// `<<'EOF'...` as a heredoc and pass the contents via stdin (which is
|
||||
/// fine, as `apply_patch` is specified to read from stdin if no argument is
|
||||
/// passed), `execvpe(3)` interprets the heredoc as a literal string. To get
|
||||
/// the `local_shell` tool to run a command the way shell would, the
|
||||
/// `command` array must be something like:
|
||||
///
|
||||
/// ```json
|
||||
/// [
|
||||
/// "bash",
|
||||
/// "-lc",
|
||||
/// "apply_patch <<'EOF'\n*** Begin Patch\n*** Update File: README.md\n@@...\n*** End Patch\nEOF\n",
|
||||
/// ]
|
||||
/// ```
|
||||
///
|
||||
/// In lenient mode, we check if the argument to `apply_patch` starts with
|
||||
/// `<<'EOF'` and ends with `EOF\n`. If so, we strip off these markers,
|
||||
/// trim() the result, and treat what is left as the patch text.
|
||||
Lenient,
|
||||
}
|
||||
|
||||
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result<ApplyPatchArgs, ParseError> {
|
||||
let lines: Vec<&str> = patch.trim().lines().collect();
|
||||
let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
|
||||
Ok(()) => &lines,
|
||||
Err(e) => match mode {
|
||||
ParseMode::Strict => {
|
||||
return Err(e);
|
||||
}
|
||||
ParseMode::Lenient => check_patch_boundaries_lenient(&lines, e)?,
|
||||
},
|
||||
};
|
||||
|
||||
let mut hunks: Vec<Hunk> = Vec::new();
|
||||
// The above checks ensure that lines.len() >= 2.
|
||||
let last_line_index = lines.len().saturating_sub(1);
|
||||
let mut remaining_lines = &lines[1..last_line_index];
|
||||
let mut line_number = 2;
|
||||
while !remaining_lines.is_empty() {
|
||||
let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;
|
||||
hunks.push(hunk);
|
||||
line_number += hunk_lines;
|
||||
remaining_lines = &remaining_lines[hunk_lines..]
|
||||
}
|
||||
let patch = lines.join("\n");
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks,
|
||||
patch,
|
||||
workdir: None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Checks the start and end lines of the patch text for `apply_patch`,
|
||||
/// returning an error if they do not match the expected markers.
|
||||
fn check_patch_boundaries_strict(lines: &[&str]) -> Result<(), ParseError> {
|
||||
let (first_line, last_line) = match lines {
|
||||
[] => (None, None),
|
||||
[first] => (Some(first), Some(first)),
|
||||
[first, .., last] => (Some(first), Some(last)),
|
||||
};
|
||||
check_start_and_end_lines_strict(first_line, last_line)
|
||||
}
|
||||
|
||||
/// If we are in lenient mode, we check if the first line starts with `<<EOF`
|
||||
/// (possibly quoted) and the last line ends with `EOF`. There must be at least
|
||||
/// 4 lines total because the heredoc markers take up 2 lines and the patch text
|
||||
/// must have at least 2 lines.
|
||||
///
|
||||
/// If successful, returns the lines of the patch text that contain the patch
|
||||
/// contents, excluding the heredoc markers.
|
||||
fn check_patch_boundaries_lenient<'a>(
|
||||
original_lines: &'a [&'a str],
|
||||
original_parse_error: ParseError,
|
||||
) -> Result<&'a [&'a str], ParseError> {
|
||||
match original_lines {
|
||||
[first, .., last] => {
|
||||
if (first == &"<<EOF" || first == &"<<'EOF'" || first == &"<<\"EOF\"")
|
||||
&& last.ends_with("EOF")
|
||||
&& original_lines.len() >= 4
|
||||
{
|
||||
let inner_lines = &original_lines[1..original_lines.len() - 1];
|
||||
match check_patch_boundaries_strict(inner_lines) {
|
||||
Ok(()) => Ok(inner_lines),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
} else {
|
||||
Err(original_parse_error)
|
||||
}
|
||||
}
|
||||
_ => Err(original_parse_error),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_start_and_end_lines_strict(
|
||||
first_line: Option<&&str>,
|
||||
last_line: Option<&&str>,
|
||||
) -> Result<(), ParseError> {
|
||||
match (first_line, last_line) {
|
||||
(Some(&first), Some(&last)) if first == BEGIN_PATCH_MARKER && last == END_PATCH_MARKER => {
|
||||
Ok(())
|
||||
}
|
||||
(Some(&first), _) if first != BEGIN_PATCH_MARKER => Err(InvalidPatchError(String::from(
|
||||
"The first line of the patch must be '*** Begin Patch'",
|
||||
))),
|
||||
_ => Err(InvalidPatchError(String::from(
|
||||
"The last line of the patch must be '*** End Patch'",
|
||||
))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Attempts to parse a single hunk from the start of lines.
|
||||
/// Returns the parsed hunk and the number of lines parsed (or a ParseError).
|
||||
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
|
||||
// Be tolerant of case mismatches and extra padding around marker strings.
|
||||
let first_line = lines[0].trim();
|
||||
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
|
||||
// Add File
|
||||
let mut contents = String::new();
|
||||
let mut parsed_lines = 1;
|
||||
for add_line in &lines[1..] {
|
||||
if let Some(line_to_add) = add_line.strip_prefix('+') {
|
||||
contents.push_str(line_to_add);
|
||||
contents.push('\n');
|
||||
parsed_lines += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Ok((
|
||||
AddFile {
|
||||
path: PathBuf::from(path),
|
||||
contents,
|
||||
},
|
||||
parsed_lines,
|
||||
));
|
||||
} else if let Some(path) = first_line.strip_prefix(DELETE_FILE_MARKER) {
|
||||
// Delete File
|
||||
return Ok((
|
||||
DeleteFile {
|
||||
path: PathBuf::from(path),
|
||||
},
|
||||
1,
|
||||
));
|
||||
} else if let Some(path) = first_line.strip_prefix(UPDATE_FILE_MARKER) {
|
||||
// Update File
|
||||
let mut remaining_lines = &lines[1..];
|
||||
let mut parsed_lines = 1;
|
||||
|
||||
// Optional: move file line
|
||||
let move_path = remaining_lines
|
||||
.first()
|
||||
.and_then(|x| x.strip_prefix(MOVE_TO_MARKER));
|
||||
|
||||
if move_path.is_some() {
|
||||
remaining_lines = &remaining_lines[1..];
|
||||
parsed_lines += 1;
|
||||
}
|
||||
|
||||
let mut chunks = Vec::new();
|
||||
// NOTE: we need to know to stop once we reach the next special marker header.
|
||||
while !remaining_lines.is_empty() {
|
||||
// Skip over any completely blank lines that may separate chunks.
|
||||
if remaining_lines[0].trim().is_empty() {
|
||||
parsed_lines += 1;
|
||||
remaining_lines = &remaining_lines[1..];
|
||||
continue;
|
||||
}
|
||||
|
||||
if remaining_lines[0].starts_with("***") {
|
||||
break;
|
||||
}
|
||||
|
||||
let (chunk, chunk_lines) = parse_update_file_chunk(
|
||||
remaining_lines,
|
||||
line_number + parsed_lines,
|
||||
chunks.is_empty(),
|
||||
)?;
|
||||
chunks.push(chunk);
|
||||
parsed_lines += chunk_lines;
|
||||
remaining_lines = &remaining_lines[chunk_lines..]
|
||||
}
|
||||
|
||||
if chunks.is_empty() {
|
||||
return Err(InvalidHunkError {
|
||||
message: format!("Update file hunk for path '{path}' is empty"),
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
|
||||
return Ok((
|
||||
UpdateFile {
|
||||
path: PathBuf::from(path),
|
||||
move_path: move_path.map(PathBuf::from),
|
||||
chunks,
|
||||
},
|
||||
parsed_lines,
|
||||
));
|
||||
}
|
||||
|
||||
Err(InvalidHunkError {
|
||||
message: format!(
|
||||
"'{first_line}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
|
||||
),
|
||||
line_number,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_update_file_chunk(
|
||||
lines: &[&str],
|
||||
line_number: usize,
|
||||
allow_missing_context: bool,
|
||||
) -> Result<(UpdateFileChunk, usize), ParseError> {
|
||||
if lines.is_empty() {
|
||||
return Err(InvalidHunkError {
|
||||
message: "Update hunk does not contain any lines".to_string(),
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
// If we see an explicit context marker @@ or @@ <context>, consume it; otherwise, optionally
|
||||
// allow treating the chunk as starting directly with diff lines.
|
||||
let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER {
|
||||
(None, 1)
|
||||
} else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) {
|
||||
(Some(context.to_string()), 1)
|
||||
} else {
|
||||
if !allow_missing_context {
|
||||
return Err(InvalidHunkError {
|
||||
message: format!(
|
||||
"Expected update hunk to start with a @@ context marker, got: '{}'",
|
||||
lines[0]
|
||||
),
|
||||
line_number,
|
||||
});
|
||||
}
|
||||
(None, 0)
|
||||
};
|
||||
if start_index >= lines.len() {
|
||||
return Err(InvalidHunkError {
|
||||
message: "Update hunk does not contain any lines".to_string(),
|
||||
line_number: line_number + 1,
|
||||
});
|
||||
}
|
||||
let mut chunk = UpdateFileChunk {
|
||||
change_context,
|
||||
old_lines: Vec::new(),
|
||||
new_lines: Vec::new(),
|
||||
is_end_of_file: false,
|
||||
};
|
||||
let mut parsed_lines = 0;
|
||||
for line in &lines[start_index..] {
|
||||
match *line {
|
||||
EOF_MARKER => {
|
||||
if parsed_lines == 0 {
|
||||
return Err(InvalidHunkError {
|
||||
message: "Update hunk does not contain any lines".to_string(),
|
||||
line_number: line_number + 1,
|
||||
});
|
||||
}
|
||||
chunk.is_end_of_file = true;
|
||||
parsed_lines += 1;
|
||||
break;
|
||||
}
|
||||
line_contents => {
|
||||
match line_contents.chars().next() {
|
||||
None => {
|
||||
// Interpret this as an empty line.
|
||||
chunk.old_lines.push(String::new());
|
||||
chunk.new_lines.push(String::new());
|
||||
}
|
||||
Some(' ') => {
|
||||
chunk.old_lines.push(line_contents[1..].to_string());
|
||||
chunk.new_lines.push(line_contents[1..].to_string());
|
||||
}
|
||||
Some('+') => {
|
||||
chunk.new_lines.push(line_contents[1..].to_string());
|
||||
}
|
||||
Some('-') => {
|
||||
chunk.old_lines.push(line_contents[1..].to_string());
|
||||
}
|
||||
_ => {
|
||||
if parsed_lines == 0 {
|
||||
return Err(InvalidHunkError {
|
||||
message: format!(
|
||||
"Unexpected line found in update hunk: '{line_contents}'. Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)"
|
||||
),
|
||||
line_number: line_number + 1,
|
||||
});
|
||||
}
|
||||
// Assume this is the start of the next hunk.
|
||||
break;
|
||||
}
|
||||
}
|
||||
parsed_lines += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((chunk, parsed_lines + start_index))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_patch() {
|
||||
assert_eq!(
|
||||
parse_patch_text("bad", ParseMode::Strict),
|
||||
Err(InvalidPatchError(
|
||||
"The first line of the patch must be '*** Begin Patch'".to_string()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text("*** Begin Patch\nbad", ParseMode::Strict),
|
||||
Err(InvalidPatchError(
|
||||
"The last line of the patch must be '*** End Patch'".to_string()
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n\
|
||||
*** Update File: test.py\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
),
|
||||
Err(InvalidHunkError {
|
||||
message: "Update file hunk for path 'test.py' is empty".to_string(),
|
||||
line_number: 2,
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
Vec::new()
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n\
|
||||
*** Add File: path/add.py\n\
|
||||
+abc\n\
|
||||
+def\n\
|
||||
*** Delete File: path/delete.py\n\
|
||||
*** Update File: path/update.py\n\
|
||||
*** Move to: path/update2.py\n\
|
||||
@@ def f():\n\
|
||||
- pass\n\
|
||||
+ return 123\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![
|
||||
AddFile {
|
||||
path: PathBuf::from("path/add.py"),
|
||||
contents: "abc\ndef\n".to_string()
|
||||
},
|
||||
DeleteFile {
|
||||
path: PathBuf::from("path/delete.py")
|
||||
},
|
||||
UpdateFile {
|
||||
path: PathBuf::from("path/update.py"),
|
||||
move_path: Some(PathBuf::from("path/update2.py")),
|
||||
chunks: vec![UpdateFileChunk {
|
||||
change_context: Some("def f():".to_string()),
|
||||
old_lines: vec![" pass".to_string()],
|
||||
new_lines: vec![" return 123".to_string()],
|
||||
is_end_of_file: false
|
||||
}]
|
||||
}
|
||||
]
|
||||
);
|
||||
// Update hunk followed by another hunk (Add File).
|
||||
assert_eq!(
|
||||
parse_patch_text(
|
||||
"*** Begin Patch\n\
|
||||
*** Update File: file.py\n\
|
||||
@@\n\
|
||||
+line\n\
|
||||
*** Add File: other.py\n\
|
||||
+content\n\
|
||||
*** End Patch",
|
||||
ParseMode::Strict
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![
|
||||
UpdateFile {
|
||||
path: PathBuf::from("file.py"),
|
||||
move_path: None,
|
||||
chunks: vec![UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: vec![],
|
||||
new_lines: vec!["line".to_string()],
|
||||
is_end_of_file: false
|
||||
}],
|
||||
},
|
||||
AddFile {
|
||||
path: PathBuf::from("other.py"),
|
||||
contents: "content\n".to_string()
|
||||
}
|
||||
]
|
||||
);
|
||||
|
||||
// Update hunk without an explicit @@ header for the first chunk should parse.
|
||||
// Use a raw string to preserve the leading space diff marker on the context line.
|
||||
assert_eq!(
|
||||
parse_patch_text(
|
||||
r#"*** Begin Patch
|
||||
*** Update File: file2.py
|
||||
import foo
|
||||
+bar
|
||||
*** End Patch"#,
|
||||
ParseMode::Strict
|
||||
)
|
||||
.unwrap()
|
||||
.hunks,
|
||||
vec![UpdateFile {
|
||||
path: PathBuf::from("file2.py"),
|
||||
move_path: None,
|
||||
chunks: vec![UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: vec!["import foo".to_string()],
|
||||
new_lines: vec!["import foo".to_string(), "bar".to_string()],
|
||||
is_end_of_file: false,
|
||||
}],
|
||||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_patch_lenient() {
|
||||
let patch_text = r#"*** Begin Patch
|
||||
*** Update File: file2.py
|
||||
import foo
|
||||
+bar
|
||||
*** End Patch"#;
|
||||
let expected_patch = vec![UpdateFile {
|
||||
path: PathBuf::from("file2.py"),
|
||||
move_path: None,
|
||||
chunks: vec![UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: vec!["import foo".to_string()],
|
||||
new_lines: vec!["import foo".to_string(), "bar".to_string()],
|
||||
is_end_of_file: false,
|
||||
}],
|
||||
}];
|
||||
let expected_error =
|
||||
InvalidPatchError("The first line of the patch must be '*** Begin Patch'".to_string());
|
||||
|
||||
let patch_text_in_heredoc = format!("<<EOF\n{patch_text}\nEOF\n");
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_heredoc, ParseMode::Lenient),
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch.clone(),
|
||||
patch: patch_text.to_string(),
|
||||
workdir: None,
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_single_quoted_heredoc = format!("<<'EOF'\n{patch_text}\nEOF\n");
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_single_quoted_heredoc, ParseMode::Lenient),
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch.clone(),
|
||||
patch: patch_text.to_string(),
|
||||
workdir: None,
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_double_quoted_heredoc = format!("<<\"EOF\"\n{patch_text}\nEOF\n");
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_double_quoted_heredoc, ParseMode::Lenient),
|
||||
Ok(ApplyPatchArgs {
|
||||
hunks: expected_patch,
|
||||
patch: patch_text.to_string(),
|
||||
workdir: None,
|
||||
})
|
||||
);
|
||||
|
||||
let patch_text_in_mismatched_quotes_heredoc = format!("<<\"EOF'\n{patch_text}\nEOF\n");
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_mismatched_quotes_heredoc, ParseMode::Strict),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_in_mismatched_quotes_heredoc, ParseMode::Lenient),
|
||||
Err(expected_error.clone())
|
||||
);
|
||||
|
||||
let patch_text_with_missing_closing_heredoc =
|
||||
"<<EOF\n*** Begin Patch\n*** Update File: file2.py\nEOF\n".to_string();
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_with_missing_closing_heredoc, ParseMode::Strict),
|
||||
Err(expected_error)
|
||||
);
|
||||
assert_eq!(
|
||||
parse_patch_text(&patch_text_with_missing_closing_heredoc, ParseMode::Lenient),
|
||||
Err(InvalidPatchError(
|
||||
"The last line of the patch must be '*** End Patch'".to_string()
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_one_hunk() {
|
||||
assert_eq!(
|
||||
parse_one_hunk(&["bad"], 234),
|
||||
Err(InvalidHunkError {
|
||||
message: "'bad' is not a valid hunk header. \
|
||||
Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'".to_string(),
|
||||
line_number: 234
|
||||
})
|
||||
);
|
||||
// Other edge cases are already covered by tests above/below.
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_update_file_chunk() {
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(&["bad"], 123, false),
|
||||
Err(InvalidHunkError {
|
||||
message: "Expected update hunk to start with a @@ context marker, got: 'bad'"
|
||||
.to_string(),
|
||||
line_number: 123
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(&["@@"], 123, false),
|
||||
Err(InvalidHunkError {
|
||||
message: "Update hunk does not contain any lines".to_string(),
|
||||
line_number: 124
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(&["@@", "bad"], 123, false),
|
||||
Err(InvalidHunkError {
|
||||
message: "Unexpected line found in update hunk: 'bad'. \
|
||||
Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)".to_string(),
|
||||
line_number: 124
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(&["@@", "*** End of File"], 123, false),
|
||||
Err(InvalidHunkError {
|
||||
message: "Update hunk does not contain any lines".to_string(),
|
||||
line_number: 124
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(
|
||||
&[
|
||||
"@@ change_context",
|
||||
"",
|
||||
" context",
|
||||
"-remove",
|
||||
"+add",
|
||||
" context2",
|
||||
"*** End Patch",
|
||||
],
|
||||
123,
|
||||
false
|
||||
),
|
||||
Ok((
|
||||
(UpdateFileChunk {
|
||||
change_context: Some("change_context".to_string()),
|
||||
old_lines: vec![
|
||||
"".to_string(),
|
||||
"context".to_string(),
|
||||
"remove".to_string(),
|
||||
"context2".to_string()
|
||||
],
|
||||
new_lines: vec![
|
||||
"".to_string(),
|
||||
"context".to_string(),
|
||||
"add".to_string(),
|
||||
"context2".to_string()
|
||||
],
|
||||
is_end_of_file: false
|
||||
}),
|
||||
6
|
||||
))
|
||||
);
|
||||
assert_eq!(
|
||||
parse_update_file_chunk(&["@@", "+line", "*** End of File"], 123, false),
|
||||
Ok((
|
||||
(UpdateFileChunk {
|
||||
change_context: None,
|
||||
old_lines: vec![],
|
||||
new_lines: vec!["line".to_string()],
|
||||
is_end_of_file: true
|
||||
}),
|
||||
3
|
||||
))
|
||||
);
|
||||
}
|
||||
151
llmx-rs/apply-patch/src/seek_sequence.rs
Normal file
151
llmx-rs/apply-patch/src/seek_sequence.rs
Normal file
@@ -0,0 +1,151 @@
|
||||
/// Attempt to find the sequence of `pattern` lines within `lines` beginning at or after `start`.
|
||||
/// Returns the starting index of the match or `None` if not found. Matches are attempted with
|
||||
/// decreasing strictness: exact match, then ignoring trailing whitespace, then ignoring leading
|
||||
/// and trailing whitespace. When `eof` is true, we first try starting at the end-of-file (so that
|
||||
/// patterns intended to match file endings are applied at the end), and fall back to searching
|
||||
/// from `start` if needed.
|
||||
///
|
||||
/// Special cases handled defensively:
|
||||
/// • Empty `pattern` → returns `Some(start)` (no-op match)
|
||||
/// • `pattern.len() > lines.len()` → returns `None` (cannot match, avoids
|
||||
/// out‑of‑bounds panic that occurred pre‑2025‑04‑12)
|
||||
pub(crate) fn seek_sequence(
|
||||
lines: &[String],
|
||||
pattern: &[String],
|
||||
start: usize,
|
||||
eof: bool,
|
||||
) -> Option<usize> {
|
||||
if pattern.is_empty() {
|
||||
return Some(start);
|
||||
}
|
||||
|
||||
// When the pattern is longer than the available input there is no possible
|
||||
// match. Early‑return to avoid the out‑of‑bounds slice that would occur in
|
||||
// the search loops below (previously caused a panic when
|
||||
// `pattern.len() > lines.len()`).
|
||||
if pattern.len() > lines.len() {
|
||||
return None;
|
||||
}
|
||||
let search_start = if eof && lines.len() >= pattern.len() {
|
||||
lines.len() - pattern.len()
|
||||
} else {
|
||||
start
|
||||
};
|
||||
// Exact match first.
|
||||
for i in search_start..=lines.len().saturating_sub(pattern.len()) {
|
||||
if lines[i..i + pattern.len()] == *pattern {
|
||||
return Some(i);
|
||||
}
|
||||
}
|
||||
// Then rstrip match.
|
||||
for i in search_start..=lines.len().saturating_sub(pattern.len()) {
|
||||
let mut ok = true;
|
||||
for (p_idx, pat) in pattern.iter().enumerate() {
|
||||
if lines[i + p_idx].trim_end() != pat.trim_end() {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ok {
|
||||
return Some(i);
|
||||
}
|
||||
}
|
||||
// Finally, trim both sides to allow more lenience.
|
||||
for i in search_start..=lines.len().saturating_sub(pattern.len()) {
|
||||
let mut ok = true;
|
||||
for (p_idx, pat) in pattern.iter().enumerate() {
|
||||
if lines[i + p_idx].trim() != pat.trim() {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ok {
|
||||
return Some(i);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// Final, most permissive pass – attempt to match after *normalising*
|
||||
// common Unicode punctuation to their ASCII equivalents so that diffs
|
||||
// authored with plain ASCII characters can still be applied to source
|
||||
// files that contain typographic dashes / quotes, etc. This mirrors the
|
||||
// fuzzy behaviour of `git apply` which ignores minor byte-level
|
||||
// differences when locating context lines.
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
fn normalise(s: &str) -> String {
|
||||
s.trim()
|
||||
.chars()
|
||||
.map(|c| match c {
|
||||
// Various dash / hyphen code-points → ASCII '-'
|
||||
'\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}'
|
||||
| '\u{2212}' => '-',
|
||||
// Fancy single quotes → '\''
|
||||
'\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => '\'',
|
||||
// Fancy double quotes → '"'
|
||||
'\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => '"',
|
||||
// Non-breaking space and other odd spaces → normal space
|
||||
'\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}'
|
||||
| '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}'
|
||||
| '\u{3000}' => ' ',
|
||||
other => other,
|
||||
})
|
||||
.collect::<String>()
|
||||
}
|
||||
|
||||
for i in search_start..=lines.len().saturating_sub(pattern.len()) {
|
||||
let mut ok = true;
|
||||
for (p_idx, pat) in pattern.iter().enumerate() {
|
||||
if normalise(&lines[i + p_idx]) != normalise(pat) {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ok {
|
||||
return Some(i);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::seek_sequence;
|
||||
use std::string::ToString;
|
||||
|
||||
fn to_vec(strings: &[&str]) -> Vec<String> {
|
||||
strings.iter().map(ToString::to_string).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exact_match_finds_sequence() {
|
||||
let lines = to_vec(&["foo", "bar", "baz"]);
|
||||
let pattern = to_vec(&["bar", "baz"]);
|
||||
assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rstrip_match_ignores_trailing_whitespace() {
|
||||
let lines = to_vec(&["foo ", "bar\t\t"]);
|
||||
// Pattern omits trailing whitespace.
|
||||
let pattern = to_vec(&["foo", "bar"]);
|
||||
assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trim_match_ignores_leading_and_trailing_whitespace() {
|
||||
let lines = to_vec(&[" foo ", " bar\t"]);
|
||||
// Pattern omits any additional whitespace.
|
||||
let pattern = to_vec(&["foo", "bar"]);
|
||||
assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_pattern_longer_than_input_returns_none() {
|
||||
let lines = to_vec(&["just one line"]);
|
||||
let pattern = to_vec(&["too", "many", "lines"]);
|
||||
// Should not panic – must return None when pattern cannot possibly fit.
|
||||
assert_eq!(seek_sequence(&lines, &pattern, 0, false), None);
|
||||
}
|
||||
}
|
||||
59
llmx-rs/apply-patch/src/standalone_executable.rs
Normal file
59
llmx-rs/apply-patch/src/standalone_executable.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
|
||||
pub fn main() -> ! {
|
||||
let exit_code = run_main();
|
||||
std::process::exit(exit_code);
|
||||
}
|
||||
|
||||
/// We would prefer to return `std::process::ExitCode`, but its `exit_process()`
|
||||
/// method is still a nightly API and we want main() to return !.
|
||||
pub fn run_main() -> i32 {
|
||||
// Expect either one argument (the full apply_patch payload) or read it from stdin.
|
||||
let mut args = std::env::args_os();
|
||||
let _argv0 = args.next();
|
||||
|
||||
let patch_arg = match args.next() {
|
||||
Some(arg) => match arg.into_string() {
|
||||
Ok(s) => s,
|
||||
Err(_) => {
|
||||
eprintln!("Error: apply_patch requires a UTF-8 PATCH argument.");
|
||||
return 1;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
// No argument provided; attempt to read the patch from stdin.
|
||||
let mut buf = String::new();
|
||||
match std::io::stdin().read_to_string(&mut buf) {
|
||||
Ok(_) => {
|
||||
if buf.is_empty() {
|
||||
eprintln!("Usage: apply_patch 'PATCH'\n echo 'PATCH' | apply-patch");
|
||||
return 2;
|
||||
}
|
||||
buf
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("Error: Failed to read PATCH from stdin.\n{err}");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Refuse extra args to avoid ambiguity.
|
||||
if args.next().is_some() {
|
||||
eprintln!("Error: apply_patch accepts exactly one argument.");
|
||||
return 2;
|
||||
}
|
||||
|
||||
let mut stdout = std::io::stdout();
|
||||
let mut stderr = std::io::stderr();
|
||||
match crate::apply_patch(&patch_arg, &mut stdout, &mut stderr) {
|
||||
Ok(()) => {
|
||||
// Flush to ensure output ordering when used in pipelines.
|
||||
let _ = stdout.flush();
|
||||
0
|
||||
}
|
||||
Err(_) => 1,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user