//! This module is responsible for parsing & validating a patch into a list of "hunks". //! (It does not attempt to actually check that the patch can be applied to the filesystem.) //! //! The official Lark grammar for the apply-patch format is: //! //! start: begin_patch hunk+ end_patch //! begin_patch: "*** Begin Patch" LF //! end_patch: "*** End Patch" LF? //! //! hunk: add_hunk | delete_hunk | update_hunk //! add_hunk: "*** Add File: " filename LF add_line+ //! delete_hunk: "*** Delete File: " filename LF //! update_hunk: "*** Update File: " filename LF change_move? change? //! filename: /(.+)/ //! add_line: "+" /(.+)/ LF -> line //! //! change_move: "*** Move to: " filename LF //! change: (change_context | change_line)+ eof_line? //! change_context: ("@@" | "@@ " /(.+)/) LF //! change_line: ("+" | "-" | " ") /(.+)/ LF //! eof_line: "*** End of File" LF //! //! The parser below is a little more lenient than the explicit spec and allows for //! leading/trailing whitespace around patch markers. use std::path::PathBuf; use thiserror::Error; const BEGIN_PATCH_MARKER: &str = "*** Begin Patch"; const END_PATCH_MARKER: &str = "*** End Patch"; const ADD_FILE_MARKER: &str = "*** Add File: "; const DELETE_FILE_MARKER: &str = "*** Delete File: "; const UPDATE_FILE_MARKER: &str = "*** Update File: "; const MOVE_TO_MARKER: &str = "*** Move to: "; const EOF_MARKER: &str = "*** End of File"; const CHANGE_CONTEXT_MARKER: &str = "@@ "; const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@"; #[derive(Debug, PartialEq, Error)] pub enum ParseError { #[error("invalid patch: {0}")] InvalidPatchError(String), #[error("invalid hunk at line {line_number}, {message}")] InvalidHunkError { message: String, line_number: usize }, } use ParseError::*; #[derive(Debug, PartialEq)] #[allow(clippy::enum_variant_names)] pub enum Hunk { AddFile { path: PathBuf, contents: String, }, DeleteFile { path: PathBuf, }, UpdateFile { path: PathBuf, move_path: Option, /// Chunks should be in order, i.e. the `change_context` of one chunk /// should occur later in the file than the previous chunk. chunks: Vec, }, } use Hunk::*; #[derive(Debug, PartialEq)] pub struct UpdateFileChunk { /// A single line of context used to narrow down the position of the chunk /// (this is usually a class, method, or function definition.) pub change_context: Option, /// A contiguous block of lines that should be replaced with `new_lines`. /// `old_lines` must occur strictly after `change_context`. pub old_lines: Vec, pub new_lines: Vec, /// If set to true, `old_lines` must occur at the end of the source file. /// (Tolerance around trailing newlines should be encouraged.) pub is_end_of_file: bool, } pub fn parse_patch(patch: &str) -> Result, ParseError> { let lines: Vec<&str> = patch.trim().lines().collect(); if lines.is_empty() || lines[0] != BEGIN_PATCH_MARKER { return Err(InvalidPatchError(String::from( "The first line of the patch must be '*** Begin Patch'", ))); } let last_line_index = lines.len() - 1; if lines[last_line_index] != END_PATCH_MARKER { return Err(InvalidPatchError(String::from( "The last line of the patch must be '*** End Patch'", ))); } let mut hunks: Vec = Vec::new(); let mut remaining_lines = &lines[1..last_line_index]; let mut line_number = 2; while !remaining_lines.is_empty() { let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?; hunks.push(hunk); line_number += hunk_lines; remaining_lines = &remaining_lines[hunk_lines..] } Ok(hunks) } /// Attempts to parse a single hunk from the start of lines. /// Returns the parsed hunk and the number of lines parsed (or a ParseError). fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> { // Be tolerant of case mismatches and extra padding around marker strings. let first_line = lines[0].trim(); if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) { // Add File let mut contents = String::new(); let mut parsed_lines = 1; for add_line in &lines[1..] { if let Some(line_to_add) = add_line.strip_prefix('+') { contents.push_str(line_to_add); contents.push('\n'); parsed_lines += 1; } else { break; } } return Ok(( AddFile { path: PathBuf::from(path), contents, }, parsed_lines, )); } else if let Some(path) = first_line.strip_prefix(DELETE_FILE_MARKER) { // Delete File return Ok(( DeleteFile { path: PathBuf::from(path), }, 1, )); } else if let Some(path) = first_line.strip_prefix(UPDATE_FILE_MARKER) { // Update File let mut remaining_lines = &lines[1..]; let mut parsed_lines = 1; // Optional: move file line let move_path = remaining_lines .first() .and_then(|x| x.strip_prefix(MOVE_TO_MARKER)); if move_path.is_some() { remaining_lines = &remaining_lines[1..]; parsed_lines += 1; } let mut chunks = Vec::new(); // NOTE: we need to know to stop once we reach the next special marker header. while !remaining_lines.is_empty() { // Skip over any completely blank lines that may separate chunks. if remaining_lines[0].trim().is_empty() { parsed_lines += 1; remaining_lines = &remaining_lines[1..]; continue; } if remaining_lines[0].starts_with("***") { break; } let (chunk, chunk_lines) = parse_update_file_chunk( remaining_lines, line_number + parsed_lines, chunks.is_empty(), )?; chunks.push(chunk); parsed_lines += chunk_lines; remaining_lines = &remaining_lines[chunk_lines..] } if chunks.is_empty() { return Err(InvalidHunkError { message: format!("Update file hunk for path '{path}' is empty"), line_number, }); } return Ok(( UpdateFile { path: PathBuf::from(path), move_path: move_path.map(PathBuf::from), chunks, }, parsed_lines, )); } Err(InvalidHunkError { message: format!( "'{first_line}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'" ), line_number, }) } fn parse_update_file_chunk( lines: &[&str], line_number: usize, allow_missing_context: bool, ) -> Result<(UpdateFileChunk, usize), ParseError> { if lines.is_empty() { return Err(InvalidHunkError { message: "Update hunk does not contain any lines".to_string(), line_number, }); } // If we see an explicit context marker @@ or @@ , consume it; otherwise, optionally // allow treating the chunk as starting directly with diff lines. let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER { (None, 1) } else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) { (Some(context.to_string()), 1) } else { if !allow_missing_context { return Err(InvalidHunkError { message: format!( "Expected update hunk to start with a @@ context marker, got: '{}'", lines[0] ), line_number, }); } (None, 0) }; if start_index >= lines.len() { return Err(InvalidHunkError { message: "Update hunk does not contain any lines".to_string(), line_number: line_number + 1, }); } let mut chunk = UpdateFileChunk { change_context, old_lines: Vec::new(), new_lines: Vec::new(), is_end_of_file: false, }; let mut parsed_lines = 0; for line in &lines[start_index..] { match *line { EOF_MARKER => { if parsed_lines == 0 { return Err(InvalidHunkError { message: "Update hunk does not contain any lines".to_string(), line_number: line_number + 1, }); } chunk.is_end_of_file = true; parsed_lines += 1; break; } line_contents => { match line_contents.chars().next() { None => { // Interpret this as an empty line. chunk.old_lines.push(String::new()); chunk.new_lines.push(String::new()); } Some(' ') => { chunk.old_lines.push(line_contents[1..].to_string()); chunk.new_lines.push(line_contents[1..].to_string()); } Some('+') => { chunk.new_lines.push(line_contents[1..].to_string()); } Some('-') => { chunk.old_lines.push(line_contents[1..].to_string()); } _ => { if parsed_lines == 0 { return Err(InvalidHunkError { message: format!( "Unexpected line found in update hunk: '{line_contents}'. Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)" ), line_number: line_number + 1, }); } // Assume this is the start of the next hunk. break; } } parsed_lines += 1; } } } Ok((chunk, parsed_lines + start_index)) } #[test] fn test_parse_patch() { assert_eq!( parse_patch("bad"), Err(InvalidPatchError( "The first line of the patch must be '*** Begin Patch'".to_string() )) ); assert_eq!( parse_patch("*** Begin Patch\nbad"), Err(InvalidPatchError( "The last line of the patch must be '*** End Patch'".to_string() )) ); assert_eq!( parse_patch( "*** Begin Patch\n\ *** Update File: test.py\n\ *** End Patch" ), Err(InvalidHunkError { message: "Update file hunk for path 'test.py' is empty".to_string(), line_number: 2, }) ); assert_eq!( parse_patch( "*** Begin Patch\n\ *** End Patch" ), Ok(Vec::new()) ); assert_eq!( parse_patch( "*** Begin Patch\n\ *** Add File: path/add.py\n\ +abc\n\ +def\n\ *** Delete File: path/delete.py\n\ *** Update File: path/update.py\n\ *** Move to: path/update2.py\n\ @@ def f():\n\ - pass\n\ + return 123\n\ *** End Patch" ), Ok(vec![ AddFile { path: PathBuf::from("path/add.py"), contents: "abc\ndef\n".to_string() }, DeleteFile { path: PathBuf::from("path/delete.py") }, UpdateFile { path: PathBuf::from("path/update.py"), move_path: Some(PathBuf::from("path/update2.py")), chunks: vec![UpdateFileChunk { change_context: Some("def f():".to_string()), old_lines: vec![" pass".to_string()], new_lines: vec![" return 123".to_string()], is_end_of_file: false }] } ]) ); // Update hunk followed by another hunk (Add File). assert_eq!( parse_patch( "*** Begin Patch\n\ *** Update File: file.py\n\ @@\n\ +line\n\ *** Add File: other.py\n\ +content\n\ *** End Patch" ), Ok(vec![ UpdateFile { path: PathBuf::from("file.py"), move_path: None, chunks: vec![UpdateFileChunk { change_context: None, old_lines: vec![], new_lines: vec!["line".to_string()], is_end_of_file: false }], }, AddFile { path: PathBuf::from("other.py"), contents: "content\n".to_string() } ]) ); // Update hunk without an explicit @@ header for the first chunk should parse. // Use a raw string to preserve the leading space diff marker on the context line. assert_eq!( parse_patch( r#"*** Begin Patch *** Update File: file2.py import foo +bar *** End Patch"#, ), Ok(vec![UpdateFile { path: PathBuf::from("file2.py"), move_path: None, chunks: vec![UpdateFileChunk { change_context: None, old_lines: vec!["import foo".to_string()], new_lines: vec!["import foo".to_string(), "bar".to_string()], is_end_of_file: false, }], }]) ); } #[test] fn test_parse_one_hunk() { assert_eq!( parse_one_hunk(&["bad"], 234), Err(InvalidHunkError { message: "'bad' is not a valid hunk header. \ Valid hunk headers: '*** Add File: {path}', '*** Delete File: {path}', '*** Update File: {path}'".to_string(), line_number: 234 }) ); // Other edge cases are already covered by tests above/below. } #[test] fn test_update_file_chunk() { assert_eq!( parse_update_file_chunk(&["bad"], 123, false), Err(InvalidHunkError { message: "Expected update hunk to start with a @@ context marker, got: 'bad'" .to_string(), line_number: 123 }) ); assert_eq!( parse_update_file_chunk(&["@@"], 123, false), Err(InvalidHunkError { message: "Update hunk does not contain any lines".to_string(), line_number: 124 }) ); assert_eq!( parse_update_file_chunk(&["@@", "bad"], 123, false), Err(InvalidHunkError { message: "Unexpected line found in update hunk: 'bad'. \ Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)".to_string(), line_number: 124 }) ); assert_eq!( parse_update_file_chunk(&["@@", "*** End of File"], 123, false), Err(InvalidHunkError { message: "Update hunk does not contain any lines".to_string(), line_number: 124 }) ); assert_eq!( parse_update_file_chunk( &[ "@@ change_context", "", " context", "-remove", "+add", " context2", "*** End Patch", ], 123, false ), Ok(( (UpdateFileChunk { change_context: Some("change_context".to_string()), old_lines: vec![ "".to_string(), "context".to_string(), "remove".to_string(), "context2".to_string() ], new_lines: vec![ "".to_string(), "context".to_string(), "add".to_string(), "context2".to_string() ], is_end_of_file: false }), 6 )) ); assert_eq!( parse_update_file_chunk(&["@@", "+line", "*** End of File"], 123, false), Ok(( (UpdateFileChunk { change_context: None, old_lines: vec![], new_lines: vec!["line".to_string()], is_end_of_file: true }), 3 )) ); }