/// Attempt to find the sequence of `pattern` lines within `lines` beginning at or after `start`. /// Returns the starting index of the match or `None` if not found. Matches are attempted with /// decreasing strictness: exact match, then ignoring trailing whitespace, then ignoring leading /// and trailing whitespace. When `eof` is true, we first try starting at the end-of-file (so that /// patterns intended to match file endings are applied at the end), and fall back to searching /// from `start` if needed. /// /// Special cases handled defensively: /// • Empty `pattern` → returns `Some(start)` (no-op match) /// • `pattern.len() > lines.len()` → returns `None` (cannot match, avoids /// out‑of‑bounds panic that occurred pre‑2025‑04‑12) pub(crate) fn seek_sequence( lines: &[String], pattern: &[String], start: usize, eof: bool, ) -> Option { if pattern.is_empty() { return Some(start); } // When the pattern is longer than the available input there is no possible // match. Early‑return to avoid the out‑of‑bounds slice that would occur in // the search loops below (previously caused a panic when // `pattern.len() > lines.len()`). if pattern.len() > lines.len() { return None; } let search_start = if eof && lines.len() >= pattern.len() { lines.len() - pattern.len() } else { start }; // Exact match first. for i in search_start..=lines.len().saturating_sub(pattern.len()) { if lines[i..i + pattern.len()] == *pattern { return Some(i); } } // Then rstrip match. for i in search_start..=lines.len().saturating_sub(pattern.len()) { let mut ok = true; for (p_idx, pat) in pattern.iter().enumerate() { if lines[i + p_idx].trim_end() != pat.trim_end() { ok = false; break; } } if ok { return Some(i); } } // Finally, trim both sides to allow more lenience. for i in search_start..=lines.len().saturating_sub(pattern.len()) { let mut ok = true; for (p_idx, pat) in pattern.iter().enumerate() { if lines[i + p_idx].trim() != pat.trim() { ok = false; break; } } if ok { return Some(i); } } // ------------------------------------------------------------------ // Final, most permissive pass – attempt to match after *normalising* // common Unicode punctuation to their ASCII equivalents so that diffs // authored with plain ASCII characters can still be applied to source // files that contain typographic dashes / quotes, etc. This mirrors the // fuzzy behaviour of `git apply` which ignores minor byte-level // differences when locating context lines. // ------------------------------------------------------------------ fn normalise(s: &str) -> String { s.trim() .chars() .map(|c| match c { // Various dash / hyphen code-points → ASCII '-' '\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}' | '\u{2212}' => '-', // Fancy single quotes → '\'' '\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => '\'', // Fancy double quotes → '"' '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => '"', // Non-breaking space and other odd spaces → normal space '\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}' | '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}' | '\u{3000}' => ' ', other => other, }) .collect::() } for i in search_start..=lines.len().saturating_sub(pattern.len()) { let mut ok = true; for (p_idx, pat) in pattern.iter().enumerate() { if normalise(&lines[i + p_idx]) != normalise(pat) { ok = false; break; } } if ok { return Some(i); } } None } #[cfg(test)] mod tests { use super::seek_sequence; use std::string::ToString; fn to_vec(strings: &[&str]) -> Vec { strings.iter().map(ToString::to_string).collect() } #[test] fn test_exact_match_finds_sequence() { let lines = to_vec(&["foo", "bar", "baz"]); let pattern = to_vec(&["bar", "baz"]); assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(1)); } #[test] fn test_rstrip_match_ignores_trailing_whitespace() { let lines = to_vec(&["foo ", "bar\t\t"]); // Pattern omits trailing whitespace. let pattern = to_vec(&["foo", "bar"]); assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0)); } #[test] fn test_trim_match_ignores_leading_and_trailing_whitespace() { let lines = to_vec(&[" foo ", " bar\t"]); // Pattern omits any additional whitespace. let pattern = to_vec(&["foo", "bar"]); assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0)); } #[test] fn test_pattern_longer_than_input_returns_none() { let lines = to_vec(&["just one line"]); let pattern = to_vec(&["too", "many", "lines"]); // Should not panic – must return None when pattern cannot possibly fit. assert_eq!(seek_sequence(&lines, &pattern, 0, false), None); } }