codex-rs/apply-patch/src/seek_sequence.rs

/// Attempt to find the sequence of `pattern` lines within `lines` beginning at or after `start`.
/// Returns the starting index of the match or `None` if not found. Matches are attempted with
/// decreasing strictness: exact match, then ignoring trailing whitespace, then ignoring leading
/// and trailing whitespace. When `eof` is true, we first try starting at the end-of-file (so that
/// patterns intended to match file endings are applied at the end), and fall back to searching
/// from `start` if needed.
///
/// Special cases handled defensively:
///  • Empty `pattern` → returns `Some(start)` (no-op match)
///  • `pattern.len() > lines.len()` → returns `None` (cannot match, avoids
///    out‑of‑bounds panic that occurred pre‑2025‑04‑12)
pub(crate) fn seek_sequence(
    lines: &[String],
    pattern: &[String],
    start: usize,
    eof: bool,
) -> Option<usize> {
    if pattern.is_empty() {
        return Some(start);
    }

    // When the pattern is longer than the available input there is no possible
    // match. Early‑return to avoid the out‑of‑bounds slice that would occur in
    // the search loops below (previously caused a panic when
    // `pattern.len() > lines.len()`).
    if pattern.len() > lines.len() {
        return None;
    }
    let search_start = if eof && lines.len() >= pattern.len() {
        lines.len() - pattern.len()
    } else {
        start
    };
    // Exact match first.
    for i in search_start..=lines.len().saturating_sub(pattern.len()) {
        if lines[i..i + pattern.len()] == *pattern {
            return Some(i);
        }
    }
    // Then rstrip match.
    for i in search_start..=lines.len().saturating_sub(pattern.len()) {
        let mut ok = true;
        for (p_idx, pat) in pattern.iter().enumerate() {
            if lines[i + p_idx].trim_end() != pat.trim_end() {
                ok = false;
                break;
            }
        }
        if ok {
            return Some(i);
        }
    }
    // Finally, trim both sides to allow more lenience.
    for i in search_start..=lines.len().saturating_sub(pattern.len()) {
        let mut ok = true;
        for (p_idx, pat) in pattern.iter().enumerate() {
            if lines[i + p_idx].trim() != pat.trim() {
                ok = false;
                break;
            }
        }
        if ok {
            return Some(i);
        }
    }

    // ------------------------------------------------------------------
    // Final, most permissive pass – attempt to match after *normalising*
    // common Unicode punctuation to their ASCII equivalents so that diffs
    // authored with plain ASCII characters can still be applied to source
    // files that contain typographic dashes / quotes, etc.  This mirrors the
    // fuzzy behaviour of `git apply` which ignores minor byte-level
    // differences when locating context lines.
    // ------------------------------------------------------------------

    fn normalise(s: &str) -> String {
        s.trim()
            .chars()
            .map(|c| match c {
                // Various dash / hyphen code-points → ASCII '-'
                '\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}'
                | '\u{2212}' => '-',
                // Fancy single quotes → '\''
                '\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => '\'',
                // Fancy double quotes → '"'
                '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => '"',
                // Non-breaking space and other odd spaces → normal space
                '\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}'
                | '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}'
                | '\u{3000}' => ' ',
                other => other,
            })
            .collect::<String>()
    }

    for i in search_start..=lines.len().saturating_sub(pattern.len()) {
        let mut ok = true;
        for (p_idx, pat) in pattern.iter().enumerate() {
            if normalise(&lines[i + p_idx]) != normalise(pat) {
                ok = false;
                break;
            }
        }
        if ok {
            return Some(i);
        }
    }

    None
}

#[cfg(test)]
mod tests {
    use super::seek_sequence;
    use std::string::ToString;

    fn to_vec(strings: &[&str]) -> Vec<String> {
        strings.iter().map(ToString::to_string).collect()
    }

    #[test]
    fn test_exact_match_finds_sequence() {
        let lines = to_vec(&["foo", "bar", "baz"]);
        let pattern = to_vec(&["bar", "baz"]);
        assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(1));
    }

    #[test]
    fn test_rstrip_match_ignores_trailing_whitespace() {
        let lines = to_vec(&["foo   ", "bar\t\t"]);
        // Pattern omits trailing whitespace.
        let pattern = to_vec(&["foo", "bar"]);
        assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0));
    }

    #[test]
    fn test_trim_match_ignores_leading_and_trailing_whitespace() {
        let lines = to_vec(&["    foo   ", "   bar\t"]);
        // Pattern omits any additional whitespace.
        let pattern = to_vec(&["foo", "bar"]);
        assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0));
    }

    #[test]
    fn test_pattern_longer_than_input_returns_none() {
        let lines = to_vec(&["just one line"]);
        let pattern = to_vec(&["too", "many", "lines"]);
        // Should not panic – must return None when pattern cannot possibly fit.
        assert_eq!(seek_sequence(&lines, &pattern, 0, false), None);
    }
}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								/// Attempt to find the sequence of `pattern` lines within `lines` beginning at or after `start`.
 								/// Returns the starting index of the match or `None` if not found. Matches are attempted with
 								/// decreasing strictness: exact match, then ignoring trailing whitespace, then ignoring leading
 								/// and trailing whitespace. When `eof` is true, we first try starting at the end-of-file (so that
 								/// patterns intended to match file endings are applied at the end), and fall back to searching
 								/// from `start` if needed.
 								///
 								/// Special cases handled defensively:
 								///  • Empty `pattern` → returns `Some(start)` (no-op match)
 								///  • `pattern.len() > lines.len()` → returns `None` (cannot match, avoids
 								///    out‑of‑bounds panic that occurred pre‑2025‑04‑12)
 								pub(crate) fn seek_sequence(
 								    lines: &[String],
 								    pattern: &[String],
 								    start: usize,
 								    eof: bool,
 								) -> Option<usize> {
 								    if pattern.is_empty() {
 								        return Some(start);
 								    }
 								    // When the pattern is longer than the available input there is no possible
 								    // match. Early‑return to avoid the out‑of‑bounds slice that would occur in
 								    // the search loops below (previously caused a panic when
 								    // `pattern.len() > lines.len()`).
 								    if pattern.len() > lines.len() {
 								        return None;
 								    }
 								    let search_start = if eof && lines.len() >= pattern.len() {
 								        lines.len() - pattern.len()
 								    } else {
 								        start
 								    };
 								    // Exact match first.
 								    for i in search_start..=lines.len().saturating_sub(pattern.len()) {
 								        if lines[i..i + pattern.len()] == *pattern {
 								            return Some(i);
 								        }
 								    }
 								    // Then rstrip match.
 								    for i in search_start..=lines.len().saturating_sub(pattern.len()) {
 								        let mut ok = true;
 								        for (p_idx, pat) in pattern.iter().enumerate() {
 								            if lines[i + p_idx].trim_end() != pat.trim_end() {
 								                ok = false;
 								                break;
 								            }
 								        }
 								        if ok {
 								            return Some(i);
 								        }
 								    }
 								    // Finally, trim both sides to allow more lenience.
 								    for i in search_start..=lines.len().saturating_sub(pattern.len()) {
 								        let mut ok = true;
 								        for (p_idx, pat) in pattern.iter().enumerate() {
 								            if lines[i + p_idx].trim() != pat.trim() {
 								                ok = false;
 								                break;
 								            }
 								        }
 								        if ok {
 								            return Some(i);
 								        }
 								    }
-												fix: handling weird unicode characters in `apply_patch` (#674)

I � unicode
											
										
										
											2025-04-25 16:01:58 -07:00
 								    // ------------------------------------------------------------------
 								    // Final, most permissive pass – attempt to match after *normalising*
 								    // common Unicode punctuation to their ASCII equivalents so that diffs
 								    // authored with plain ASCII characters can still be applied to source
 								    // files that contain typographic dashes / quotes, etc.  This mirrors the
 								    // fuzzy behaviour of `git apply` which ignores minor byte-level
 								    // differences when locating context lines.
 								    // ------------------------------------------------------------------
 								    fn normalise(s: &str) -> String {
 								        s.trim()
 								            .chars()
 								            .map(|c| match c {
 								                // Various dash / hyphen code-points → ASCII '-'
 								                '\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}'
 								                | '\u{2212}' => '-',
 								                // Fancy single quotes → '\''
 								                '\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => '\'',
 								                // Fancy double quotes → '"'
 								                '\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => '"',
 								                // Non-breaking space and other odd spaces → normal space
 								                '\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}'
 								                | '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}'
 								                | '\u{3000}' => ' ',
 								                other => other,
 								            })
 								            .collect::<String>()
 								    }
 								    for i in search_start..=lines.len().saturating_sub(pattern.len()) {
 								        let mut ok = true;
 								        for (p_idx, pat) in pattern.iter().enumerate() {
 								            if normalise(&lines[i + p_idx]) != normalise(pat) {
 								                ok = false;
 								                break;
 								            }
 								        }
 								        if ok {
 								            return Some(i);
 								        }
 								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    None
 								}
 								#[cfg(test)]
 								mod tests {
 								    use super::seek_sequence;
-												chore: clippy on redundant closure (#4058)

Add redundant closure clippy rules and let Codex fix it by minimising
FQP
											
										
										
											2025-09-22 20:30:16 +01:00
+								    use std::string::ToString;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
 								    fn to_vec(strings: &[&str]) -> Vec<String> {
-												chore: clippy on redundant closure (#4058)

Add redundant closure clippy rules and let Codex fix it by minimising
FQP
											
										
										
											2025-09-22 20:30:16 +01:00
+								        strings.iter().map(ToString::to_string).collect()
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    }
 								    #[test]
 								    fn test_exact_match_finds_sequence() {
 								        let lines = to_vec(&["foo", "bar", "baz"]);
 								        let pattern = to_vec(&["bar", "baz"]);
 								        assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(1));
 								    }
 								    #[test]
 								    fn test_rstrip_match_ignores_trailing_whitespace() {
 								        let lines = to_vec(&["foo   ", "bar\t\t"]);
 								        // Pattern omits trailing whitespace.
 								        let pattern = to_vec(&["foo", "bar"]);
 								        assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0));
 								    }
 								    #[test]
 								    fn test_trim_match_ignores_leading_and_trailing_whitespace() {
 								        let lines = to_vec(&["    foo   ", "   bar\t"]);
 								        // Pattern omits any additional whitespace.
 								        let pattern = to_vec(&["foo", "bar"]);
 								        assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0));
 								    }
 								    #[test]
 								    fn test_pattern_longer_than_input_returns_none() {
 								        let lines = to_vec(&["just one line"]);
 								        let pattern = to_vec(&["too", "many", "lines"]);
 								        // Should not panic – must return None when pattern cannot possibly fit.
 								        assert_eq!(seek_sequence(&lines, &pattern, 0, false), None);
 								    }
 								}