Some effects of this change: - New formatting changes across many files. No functionality changes should occur from that. - Calls to `set_env` are considered unsafe, since this only happens in tests we wrap them in `unsafe` blocks
510 lines
17 KiB
Rust
510 lines
17 KiB
Rust
//! This module is responsible for parsing & validating a patch into a list of "hunks".
|
|
//! (It does not attempt to actually check that the patch can be applied to the filesystem.)
|
|
//!
|
|
//! The official Lark grammar for the apply-patch format is:
|
|
//!
|
|
//! start: begin_patch hunk+ end_patch
|
|
//! begin_patch: "*** Begin Patch" LF
|
|
//! end_patch: "*** End Patch" LF?
|
|
//!
|
|
//! hunk: add_hunk | delete_hunk | update_hunk
|
|
//! add_hunk: "*** Add File: " filename LF add_line+
|
|
//! delete_hunk: "*** Delete File: " filename LF
|
|
//! update_hunk: "*** Update File: " filename LF change_move? change?
|
|
//! filename: /(.+)/
|
|
//! add_line: "+" /(.+)/ LF -> line
|
|
//!
|
|
//! change_move: "*** Move to: " filename LF
|
|
//! change: (change_context | change_line)+ eof_line?
|
|
//! change_context: ("@@" | "@@ " /(.+)/) LF
|
|
//! change_line: ("+" | "-" | " ") /(.+)/ LF
|
|
//! eof_line: "*** End of File" LF
|
|
//!
|
|
//! The parser below is a little more lenient than the explicit spec and allows for
|
|
//! leading/trailing whitespace around patch markers.
|
|
use std::path::PathBuf;
|
|
|
|
use thiserror::Error;
|
|
|
|
const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
|
|
const END_PATCH_MARKER: &str = "*** End Patch";
|
|
const ADD_FILE_MARKER: &str = "*** Add File: ";
|
|
const DELETE_FILE_MARKER: &str = "*** Delete File: ";
|
|
const UPDATE_FILE_MARKER: &str = "*** Update File: ";
|
|
const MOVE_TO_MARKER: &str = "*** Move to: ";
|
|
const EOF_MARKER: &str = "*** End of File";
|
|
const CHANGE_CONTEXT_MARKER: &str = "@@ ";
|
|
const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";
|
|
|
|
#[derive(Debug, PartialEq, Error)]
|
|
pub enum ParseError {
|
|
#[error("invalid patch: {0}")]
|
|
InvalidPatchError(String),
|
|
#[error("invalid hunk at line {line_number}, {message}")]
|
|
InvalidHunkError { message: String, line_number: usize },
|
|
}
|
|
use ParseError::*;
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
#[allow(clippy::enum_variant_names)]
|
|
pub enum Hunk {
|
|
AddFile {
|
|
path: PathBuf,
|
|
contents: String,
|
|
},
|
|
DeleteFile {
|
|
path: PathBuf,
|
|
},
|
|
UpdateFile {
|
|
path: PathBuf,
|
|
move_path: Option<PathBuf>,
|
|
|
|
/// Chunks should be in order, i.e. the `change_context` of one chunk
|
|
/// should occur later in the file than the previous chunk.
|
|
chunks: Vec<UpdateFileChunk>,
|
|
},
|
|
}
|
|
use Hunk::*;
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
pub struct UpdateFileChunk {
|
|
/// A single line of context used to narrow down the position of the chunk
|
|
/// (this is usually a class, method, or function definition.)
|
|
pub change_context: Option<String>,
|
|
|
|
/// A contiguous block of lines that should be replaced with `new_lines`.
|
|
/// `old_lines` must occur strictly after `change_context`.
|
|
pub old_lines: Vec<String>,
|
|
pub new_lines: Vec<String>,
|
|
|
|
/// If set to true, `old_lines` must occur at the end of the source file.
|
|
/// (Tolerance around trailing newlines should be encouraged.)
|
|
pub is_end_of_file: bool,
|
|
}
|
|
|
|
pub fn parse_patch(patch: &str) -> Result<Vec<Hunk>, ParseError> {
|
|
let lines: Vec<&str> = patch.trim().lines().collect();
|
|
if lines.is_empty() || lines[0] != BEGIN_PATCH_MARKER {
|
|
return Err(InvalidPatchError(String::from(
|
|
"The first line of the patch must be '*** Begin Patch'",
|
|
)));
|
|
}
|
|
let last_line_index = lines.len() - 1;
|
|
if lines[last_line_index] != END_PATCH_MARKER {
|
|
return Err(InvalidPatchError(String::from(
|
|
"The last line of the patch must be '*** End Patch'",
|
|
)));
|
|
}
|
|
let mut hunks: Vec<Hunk> = Vec::new();
|
|
let mut remaining_lines = &lines[1..last_line_index];
|
|
let mut line_number = 2;
|
|
while !remaining_lines.is_empty() {
|
|
let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;
|
|
hunks.push(hunk);
|
|
line_number += hunk_lines;
|
|
remaining_lines = &remaining_lines[hunk_lines..]
|
|
}
|
|
Ok(hunks)
|
|
}
|
|
|
|
/// Attempts to parse a single hunk from the start of lines.
|
|
/// Returns the parsed hunk and the number of lines parsed (or a ParseError).
|
|
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
|
|
// Be tolerant of case mismatches and extra padding around marker strings.
|
|
let first_line = lines[0].trim();
|
|
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
|
|
// Add File
|
|
let mut contents = String::new();
|
|
let mut parsed_lines = 1;
|
|
for add_line in &lines[1..] {
|
|
if let Some(line_to_add) = add_line.strip_prefix('+') {
|
|
contents.push_str(line_to_add);
|
|
contents.push('\n');
|
|
parsed_lines += 1;
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return Ok((
|
|
AddFile {
|
|
path: PathBuf::from(path),
|
|
contents,
|
|
},
|
|
parsed_lines,
|
|
));
|
|
} else if let Some(path) = first_line.strip_prefix(DELETE_FILE_MARKER) {
|
|
// Delete File
|
|
return Ok((
|
|
DeleteFile {
|
|
path: PathBuf::from(path),
|
|
},
|
|
1,
|
|
));
|
|
} else if let Some(path) = first_line.strip_prefix(UPDATE_FILE_MARKER) {
|
|
// Update File
|
|
let mut remaining_lines = &lines[1..];
|
|
let mut parsed_lines = 1;
|
|
|
|
// Optional: move file line
|
|
let move_path = remaining_lines
|
|
.first()
|
|
.and_then(|x| x.strip_prefix(MOVE_TO_MARKER));
|
|
|
|
if move_path.is_some() {
|
|
remaining_lines = &remaining_lines[1..];
|
|
parsed_lines += 1;
|
|
}
|
|
|
|
let mut chunks = Vec::new();
|
|
// NOTE: we need to know to stop once we reach the next special marker header.
|
|
while !remaining_lines.is_empty() {
|
|
// Skip over any completely blank lines that may separate chunks.
|
|
if remaining_lines[0].trim().is_empty() {
|
|
parsed_lines += 1;
|
|
remaining_lines = &remaining_lines[1..];
|
|
continue;
|
|
}
|
|
|
|
if remaining_lines[0].starts_with("***") {
|
|
break;
|
|
}
|
|
|
|
let (chunk, chunk_lines) = parse_update_file_chunk(
|
|
remaining_lines,
|
|
line_number + parsed_lines,
|
|
chunks.is_empty(),
|
|
)?;
|
|
chunks.push(chunk);
|
|
parsed_lines += chunk_lines;
|
|
remaining_lines = &remaining_lines[chunk_lines..]
|
|
}
|
|
|
|
if chunks.is_empty() {
|
|
return Err(InvalidHunkError {
|
|
message: format!("Update file hunk for path '{path}' is empty"),
|
|
line_number,
|
|
});
|
|
}
|
|
|
|
return Ok((
|
|
UpdateFile {
|
|
path: PathBuf::from(path),
|
|
move_path: move_path.map(PathBuf::from),
|
|
chunks,
|
|
},
|
|
parsed_lines,
|
|
));
|
|
}
|
|
|
|
Err(InvalidHunkError {
|
|
message: format!(
|
|
"'{first_line}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
|
|
),
|
|
line_number,
|
|
})
|
|
}
|
|
|
|
fn parse_update_file_chunk(
|
|
lines: &[&str],
|
|
line_number: usize,
|
|
allow_missing_context: bool,
|
|
) -> Result<(UpdateFileChunk, usize), ParseError> {
|
|
if lines.is_empty() {
|
|
return Err(InvalidHunkError {
|
|
message: "Update hunk does not contain any lines".to_string(),
|
|
line_number,
|
|
});
|
|
}
|
|
// If we see an explicit context marker @@ or @@ <context>, consume it; otherwise, optionally
|
|
// allow treating the chunk as starting directly with diff lines.
|
|
let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER {
|
|
(None, 1)
|
|
} else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) {
|
|
(Some(context.to_string()), 1)
|
|
} else {
|
|
if !allow_missing_context {
|
|
return Err(InvalidHunkError {
|
|
message: format!(
|
|
"Expected update hunk to start with a @@ context marker, got: '{}'",
|
|
lines[0]
|
|
),
|
|
line_number,
|
|
});
|
|
}
|
|
(None, 0)
|
|
};
|
|
if start_index >= lines.len() {
|
|
return Err(InvalidHunkError {
|
|
message: "Update hunk does not contain any lines".to_string(),
|
|
line_number: line_number + 1,
|
|
});
|
|
}
|
|
let mut chunk = UpdateFileChunk {
|
|
change_context,
|
|
old_lines: Vec::new(),
|
|
new_lines: Vec::new(),
|
|
is_end_of_file: false,
|
|
};
|
|
let mut parsed_lines = 0;
|
|
for line in &lines[start_index..] {
|
|
match *line {
|
|
EOF_MARKER => {
|
|
if parsed_lines == 0 {
|
|
return Err(InvalidHunkError {
|
|
message: "Update hunk does not contain any lines".to_string(),
|
|
line_number: line_number + 1,
|
|
});
|
|
}
|
|
chunk.is_end_of_file = true;
|
|
parsed_lines += 1;
|
|
break;
|
|
}
|
|
line_contents => {
|
|
match line_contents.chars().next() {
|
|
None => {
|
|
// Interpret this as an empty line.
|
|
chunk.old_lines.push(String::new());
|
|
chunk.new_lines.push(String::new());
|
|
}
|
|
Some(' ') => {
|
|
chunk.old_lines.push(line_contents[1..].to_string());
|
|
chunk.new_lines.push(line_contents[1..].to_string());
|
|
}
|
|
Some('+') => {
|
|
chunk.new_lines.push(line_contents[1..].to_string());
|
|
}
|
|
Some('-') => {
|
|
chunk.old_lines.push(line_contents[1..].to_string());
|
|
}
|
|
_ => {
|
|
if parsed_lines == 0 {
|
|
return Err(InvalidHunkError {
|
|
message: format!(
|
|
"Unexpected line found in update hunk: '{line_contents}'. Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)"
|
|
),
|
|
line_number: line_number + 1,
|
|
});
|
|
}
|
|
// Assume this is the start of the next hunk.
|
|
break;
|
|
}
|
|
}
|
|
parsed_lines += 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok((chunk, parsed_lines + start_index))
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_patch() {
|
|
assert_eq!(
|
|
parse_patch("bad"),
|
|
Err(InvalidPatchError(
|
|
"The first line of the patch must be '*** Begin Patch'".to_string()
|
|
))
|
|
);
|
|
assert_eq!(
|
|
parse_patch("*** Begin Patch\nbad"),
|
|
Err(InvalidPatchError(
|
|
"The last line of the patch must be '*** End Patch'".to_string()
|
|
))
|
|
);
|
|
assert_eq!(
|
|
parse_patch(
|
|
"*** Begin Patch\n\
|
|
*** Update File: test.py\n\
|
|
*** End Patch"
|
|
),
|
|
Err(InvalidHunkError {
|
|
message: "Update file hunk for path 'test.py' is empty".to_string(),
|
|
line_number: 2,
|
|
})
|
|
);
|
|
assert_eq!(
|
|
parse_patch(
|
|
"*** Begin Patch\n\
|
|
*** End Patch"
|
|
),
|
|
Ok(Vec::new())
|
|
);
|
|
assert_eq!(
|
|
parse_patch(
|
|
"*** Begin Patch\n\
|
|
*** Add File: path/add.py\n\
|
|
+abc\n\
|
|
+def\n\
|
|
*** Delete File: path/delete.py\n\
|
|
*** Update File: path/update.py\n\
|
|
*** Move to: path/update2.py\n\
|
|
@@ def f():\n\
|
|
- pass\n\
|
|
+ return 123\n\
|
|
*** End Patch"
|
|
),
|
|
Ok(vec![
|
|
AddFile {
|
|
path: PathBuf::from("path/add.py"),
|
|
contents: "abc\ndef\n".to_string()
|
|
},
|
|
DeleteFile {
|
|
path: PathBuf::from("path/delete.py")
|
|
},
|
|
UpdateFile {
|
|
path: PathBuf::from("path/update.py"),
|
|
move_path: Some(PathBuf::from("path/update2.py")),
|
|
chunks: vec![UpdateFileChunk {
|
|
change_context: Some("def f():".to_string()),
|
|
old_lines: vec![" pass".to_string()],
|
|
new_lines: vec![" return 123".to_string()],
|
|
is_end_of_file: false
|
|
}]
|
|
}
|
|
])
|
|
);
|
|
// Update hunk followed by another hunk (Add File).
|
|
assert_eq!(
|
|
parse_patch(
|
|
"*** Begin Patch\n\
|
|
*** Update File: file.py\n\
|
|
@@\n\
|
|
+line\n\
|
|
*** Add File: other.py\n\
|
|
+content\n\
|
|
*** End Patch"
|
|
),
|
|
Ok(vec![
|
|
UpdateFile {
|
|
path: PathBuf::from("file.py"),
|
|
move_path: None,
|
|
chunks: vec![UpdateFileChunk {
|
|
change_context: None,
|
|
old_lines: vec![],
|
|
new_lines: vec!["line".to_string()],
|
|
is_end_of_file: false
|
|
}],
|
|
},
|
|
AddFile {
|
|
path: PathBuf::from("other.py"),
|
|
contents: "content\n".to_string()
|
|
}
|
|
])
|
|
);
|
|
|
|
// Update hunk without an explicit @@ header for the first chunk should parse.
|
|
// Use a raw string to preserve the leading space diff marker on the context line.
|
|
assert_eq!(
|
|
parse_patch(
|
|
r#"*** Begin Patch
|
|
*** Update File: file2.py
|
|
import foo
|
|
+bar
|
|
*** End Patch"#,
|
|
),
|
|
Ok(vec![UpdateFile {
|
|
path: PathBuf::from("file2.py"),
|
|
move_path: None,
|
|
chunks: vec![UpdateFileChunk {
|
|
change_context: None,
|
|
old_lines: vec!["import foo".to_string()],
|
|
new_lines: vec!["import foo".to_string(), "bar".to_string()],
|
|
is_end_of_file: false,
|
|
}],
|
|
}])
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_one_hunk() {
|
|
assert_eq!(
|
|
parse_one_hunk(&["bad"], 234),
|
|
Err(InvalidHunkError {
|
|
message: "'bad' is not a valid hunk header. \
|
|
Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'".to_string(),
|
|
line_number: 234
|
|
})
|
|
);
|
|
// Other edge cases are already covered by tests above/below.
|
|
}
|
|
|
|
#[test]
|
|
fn test_update_file_chunk() {
|
|
assert_eq!(
|
|
parse_update_file_chunk(&["bad"], 123, false),
|
|
Err(InvalidHunkError {
|
|
message: "Expected update hunk to start with a @@ context marker, got: 'bad'"
|
|
.to_string(),
|
|
line_number: 123
|
|
})
|
|
);
|
|
assert_eq!(
|
|
parse_update_file_chunk(&["@@"], 123, false),
|
|
Err(InvalidHunkError {
|
|
message: "Update hunk does not contain any lines".to_string(),
|
|
line_number: 124
|
|
})
|
|
);
|
|
assert_eq!(
|
|
parse_update_file_chunk(&["@@", "bad"], 123, false),
|
|
Err(InvalidHunkError {
|
|
message: "Unexpected line found in update hunk: 'bad'. \
|
|
Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)".to_string(),
|
|
line_number: 124
|
|
})
|
|
);
|
|
assert_eq!(
|
|
parse_update_file_chunk(&["@@", "*** End of File"], 123, false),
|
|
Err(InvalidHunkError {
|
|
message: "Update hunk does not contain any lines".to_string(),
|
|
line_number: 124
|
|
})
|
|
);
|
|
assert_eq!(
|
|
parse_update_file_chunk(
|
|
&[
|
|
"@@ change_context",
|
|
"",
|
|
" context",
|
|
"-remove",
|
|
"+add",
|
|
" context2",
|
|
"*** End Patch",
|
|
],
|
|
123,
|
|
false
|
|
),
|
|
Ok((
|
|
(UpdateFileChunk {
|
|
change_context: Some("change_context".to_string()),
|
|
old_lines: vec![
|
|
"".to_string(),
|
|
"context".to_string(),
|
|
"remove".to_string(),
|
|
"context2".to_string()
|
|
],
|
|
new_lines: vec![
|
|
"".to_string(),
|
|
"context".to_string(),
|
|
"add".to_string(),
|
|
"context2".to_string()
|
|
],
|
|
is_end_of_file: false
|
|
}),
|
|
6
|
|
))
|
|
);
|
|
assert_eq!(
|
|
parse_update_file_chunk(&["@@", "+line", "*** End of File"], 123, false),
|
|
Ok((
|
|
(UpdateFileChunk {
|
|
change_context: None,
|
|
old_lines: vec![],
|
|
new_lines: vec!["line".to_string()],
|
|
is_end_of_file: true
|
|
}),
|
|
3
|
|
))
|
|
);
|
|
}
|