Scrollable slash commands (#1830)
Scrollable slash commands. Part 1 of the multi PR.
This commit is contained in:
177
codex-rs/common/src/fuzzy_match.rs
Normal file
177
codex-rs/common/src/fuzzy_match.rs
Normal file
@@ -0,0 +1,177 @@
|
||||
/// Simple case-insensitive subsequence matcher used for fuzzy filtering.
|
||||
///
|
||||
/// Returns the indices (character positions) of the matched characters in the
|
||||
/// ORIGINAL `haystack` string and a score where smaller is better.
|
||||
///
|
||||
/// Unicode correctness: we perform the match on a lowercased copy of the
|
||||
/// haystack and needle but maintain a mapping from each character in the
|
||||
/// lowercased haystack back to the original character index in `haystack`.
|
||||
/// This ensures the returned indices can be safely used with
|
||||
/// `str::chars().enumerate()` consumers for highlighting, even when
|
||||
/// lowercasing expands certain characters (e.g., ß → ss, İ → i̇).
|
||||
pub fn fuzzy_match(haystack: &str, needle: &str) -> Option<(Vec<usize>, i32)> {
|
||||
if needle.is_empty() {
|
||||
return Some((Vec::new(), i32::MAX));
|
||||
}
|
||||
|
||||
let mut lowered_chars: Vec<char> = Vec::new();
|
||||
let mut lowered_to_orig_char_idx: Vec<usize> = Vec::new();
|
||||
for (orig_idx, ch) in haystack.chars().enumerate() {
|
||||
for lc in ch.to_lowercase() {
|
||||
lowered_chars.push(lc);
|
||||
lowered_to_orig_char_idx.push(orig_idx);
|
||||
}
|
||||
}
|
||||
|
||||
let lowered_needle: Vec<char> = needle.to_lowercase().chars().collect();
|
||||
|
||||
let mut result_orig_indices: Vec<usize> = Vec::with_capacity(lowered_needle.len());
|
||||
let mut last_lower_pos: Option<usize> = None;
|
||||
let mut cur = 0usize;
|
||||
for &nc in lowered_needle.iter() {
|
||||
let mut found_at: Option<usize> = None;
|
||||
while cur < lowered_chars.len() {
|
||||
if lowered_chars[cur] == nc {
|
||||
found_at = Some(cur);
|
||||
cur += 1;
|
||||
break;
|
||||
}
|
||||
cur += 1;
|
||||
}
|
||||
let pos = found_at?;
|
||||
result_orig_indices.push(lowered_to_orig_char_idx[pos]);
|
||||
last_lower_pos = Some(pos);
|
||||
}
|
||||
|
||||
let first_lower_pos = if result_orig_indices.is_empty() {
|
||||
0usize
|
||||
} else {
|
||||
let target_orig = result_orig_indices[0];
|
||||
lowered_to_orig_char_idx
|
||||
.iter()
|
||||
.position(|&oi| oi == target_orig)
|
||||
.unwrap_or(0)
|
||||
};
|
||||
// last defaults to first for single-hit; score = extra span between first/last hit
|
||||
// minus needle len (≥0).
|
||||
// Strongly reward prefix matches by subtracting 100 when the first hit is at index 0.
|
||||
let last_lower_pos = last_lower_pos.unwrap_or(first_lower_pos);
|
||||
let window =
|
||||
(last_lower_pos as i32 - first_lower_pos as i32 + 1) - (lowered_needle.len() as i32);
|
||||
let mut score = window.max(0);
|
||||
if first_lower_pos == 0 {
|
||||
score -= 100;
|
||||
}
|
||||
|
||||
result_orig_indices.sort_unstable();
|
||||
result_orig_indices.dedup();
|
||||
Some((result_orig_indices, score))
|
||||
}
|
||||
|
||||
/// Convenience wrapper to get only the indices for a fuzzy match.
|
||||
pub fn fuzzy_indices(haystack: &str, needle: &str) -> Option<Vec<usize>> {
|
||||
fuzzy_match(haystack, needle).map(|(mut idx, _)| {
|
||||
idx.sort_unstable();
|
||||
idx.dedup();
|
||||
idx
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn ascii_basic_indices() {
|
||||
let (idx, score) = match fuzzy_match("hello", "hl") {
|
||||
Some(v) => v,
|
||||
None => panic!("expected a match"),
|
||||
};
|
||||
assert_eq!(idx, vec![0, 2]);
|
||||
// 'h' at 0, 'l' at 2 -> window 1; start-of-string bonus applies (-100)
|
||||
assert_eq!(score, -99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unicode_dotted_i_istanbul_highlighting() {
|
||||
let (idx, score) = match fuzzy_match("İstanbul", "is") {
|
||||
Some(v) => v,
|
||||
None => panic!("expected a match"),
|
||||
};
|
||||
assert_eq!(idx, vec![0, 1]);
|
||||
// Matches at lowered positions 0 and 2 -> window 1; start-of-string bonus applies
|
||||
assert_eq!(score, -99);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unicode_german_sharp_s_casefold() {
|
||||
assert!(fuzzy_match("straße", "strasse").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefer_contiguous_match_over_spread() {
|
||||
let (_idx_a, score_a) = match fuzzy_match("abc", "abc") {
|
||||
Some(v) => v,
|
||||
None => panic!("expected a match"),
|
||||
};
|
||||
let (_idx_b, score_b) = match fuzzy_match("a-b-c", "abc") {
|
||||
Some(v) => v,
|
||||
None => panic!("expected a match"),
|
||||
};
|
||||
// Contiguous window -> 0; start-of-string bonus -> -100
|
||||
assert_eq!(score_a, -100);
|
||||
// Spread over 5 chars for 3-letter needle -> window 2; with bonus -> -98
|
||||
assert_eq!(score_b, -98);
|
||||
assert!(score_a < score_b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn start_of_string_bonus_applies() {
|
||||
let (_idx_a, score_a) = match fuzzy_match("file_name", "file") {
|
||||
Some(v) => v,
|
||||
None => panic!("expected a match"),
|
||||
};
|
||||
let (_idx_b, score_b) = match fuzzy_match("my_file_name", "file") {
|
||||
Some(v) => v,
|
||||
None => panic!("expected a match"),
|
||||
};
|
||||
// Start-of-string contiguous -> window 0; bonus -> -100
|
||||
assert_eq!(score_a, -100);
|
||||
// Non-prefix contiguous -> window 0; no bonus -> 0
|
||||
assert_eq!(score_b, 0);
|
||||
assert!(score_a < score_b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_needle_matches_with_max_score_and_no_indices() {
|
||||
let (idx, score) = match fuzzy_match("anything", "") {
|
||||
Some(v) => v,
|
||||
None => panic!("empty needle should match"),
|
||||
};
|
||||
assert!(idx.is_empty());
|
||||
assert_eq!(score, i32::MAX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn case_insensitive_matching_basic() {
|
||||
let (idx, score) = match fuzzy_match("FooBar", "foO") {
|
||||
Some(v) => v,
|
||||
None => panic!("expected a match"),
|
||||
};
|
||||
assert_eq!(idx, vec![0, 1, 2]);
|
||||
// Contiguous prefix match (case-insensitive) -> window 0 with bonus
|
||||
assert_eq!(score, -100);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn indices_are_deduped_for_multichar_lowercase_expansion() {
|
||||
let needle = "\u{0069}\u{0307}"; // "i" + combining dot above
|
||||
let (idx, score) = match fuzzy_match("İ", needle) {
|
||||
Some(v) => v,
|
||||
None => panic!("expected a match"),
|
||||
};
|
||||
assert_eq!(idx, vec![0]);
|
||||
// Lowercasing 'İ' expands to two chars; contiguous prefix -> window 0 with bonus
|
||||
assert_eq!(score, -100);
|
||||
}
|
||||
}
|
||||
@@ -27,3 +27,5 @@ pub use sandbox_summary::summarize_sandbox_policy;
|
||||
mod config_summary;
|
||||
|
||||
pub use config_summary::create_config_summary_entries;
|
||||
// Shared fuzzy matcher (used by TUI selection popups and other UI filtering)
|
||||
pub mod fuzzy_match;
|
||||
|
||||
Reference in New Issue
Block a user