From c99d1a0b36b9f2f54dc8521c28ac8eb3e1967794 Mon Sep 17 00:00:00 2001 From: Riccardo Mazzarini Date: Fri, 10 Nov 2023 14:58:24 +0100 Subject: [PATCH] fzf-v1: refactor forward and backward passes to use `find_first` and `find_last` --- src/algos/fzf/v1.rs | 135 ++++++++++++++++++++++++++++++++------------ src/algos/fzf/v2.rs | 14 ++--- 2 files changed, 105 insertions(+), 44 deletions(-) diff --git a/src/algos/fzf/v1.rs b/src/algos/fzf/v1.rs index 5bd75ab..d582b3b 100644 --- a/src/algos/fzf/v1.rs +++ b/src/algos/fzf/v1.rs @@ -10,6 +10,9 @@ pub struct FzfV1 { /// TODO: docs case_sensitivity: CaseSensitivity, + /// TODO: docs + normalization: bool, + /// TODO: docs scheme: Scheme, @@ -23,6 +26,7 @@ impl core::fmt::Debug for FzfV1 { f.debug_struct("FzfV1") .field("case_sensitivity", &self.case_sensitivity) .field("matched_ranges", &self.with_matched_ranges) + .field("normalization", &self.normalization) .field("scheme", &FzfScheme::from_inner(&self.scheme).unwrap()) .finish_non_exhaustive() } @@ -58,6 +62,13 @@ impl FzfV1 { self } + /// TODO: docs + #[inline] + pub fn with_normalization(mut self, normalization: bool) -> Self { + self.normalization = normalization; + self + } + /// TODO: docs #[inline] pub fn with_scoring_scheme(mut self, scheme: FzfScheme) -> Self { @@ -81,6 +92,8 @@ impl Metric for FzfV1 { return None; } + let is_candidate_ascii = candidate.is_ascii(); + let pattern = match query.search_mode { SearchMode::NotExtended(pattern) => pattern, SearchMode::Extended(_) => todo!(), @@ -92,12 +105,23 @@ impl Metric for FzfV1 { CaseSensitivity::Smart => pattern.has_uppercase, }; - let char_eq = utils::char_eq(is_case_sensitive, false); + let char_eq = utils::char_eq(is_case_sensitive, self.normalization); - let range_forward = forward_pass(pattern, candidate, char_eq)?; + let range_forward = forward_pass( + pattern, + candidate, + is_candidate_ascii, + is_case_sensitive, + char_eq, + )?; - let start_backward = - backward_pass(pattern, &candidate[range_forward.clone()], char_eq); + let start_backward = backward_pass( + pattern, + &candidate[range_forward.clone()], + is_candidate_ascii, + is_case_sensitive, + char_eq, + ); let range = range_forward.start + start_backward..range_forward.end; @@ -120,44 +144,73 @@ impl Metric for FzfV1 { #[inline] fn forward_pass( pattern: Pattern, - candidate: &str, + mut candidate: &str, + is_candidate_ascii: bool, + is_case_sensitive: bool, char_eq: CharEq, ) -> Option> { - let mut start_offset = None; + let mut pattern_chars = pattern.chars(); - let mut end_offset = None; + let mut pattern_char = pattern_chars.next()?; - let mut pattern_chars = pattern.chars(); + let (start_offset, matched_char) = utils::find_first( + pattern_char, + candidate, + is_candidate_ascii, + is_case_sensitive, + char_eq, + )?; - let mut pattern_char = pattern_chars.next().expect("pattern is not empty"); + let matched_char_byte_len = matched_char.len_utf8(); - for (offset, candidate_char) in candidate.char_indices() { - if !char_eq(pattern_char, candidate_char) { - continue; - } + let mut end_offset = start_offset + matched_char_byte_len; - if start_offset.is_none() { - start_offset = Some(offset); - } + if let Some(next) = pattern_chars.next() { + pattern_char = next; + } else { + return Some(start_offset..end_offset); + } + + // SAFETY: todo. + candidate = unsafe { candidate.get_unchecked(end_offset..) }; + + loop { + let (byte_offset, matched_char) = utils::find_first( + pattern_char, + candidate, + is_candidate_ascii, + is_case_sensitive, + char_eq, + )?; - let Some(next_target_char) = pattern_chars.next() else { - end_offset = Some(offset + candidate_char.len_utf8()); + let matched_char_byte_len = matched_char.len_utf8(); + + end_offset += byte_offset + matched_char_byte_len; + + if let Some(next) = pattern_chars.next() { + pattern_char = next; + } else { break; - }; + } - pattern_char = next_target_char; + // SAFETY: todo. + candidate = unsafe { + candidate.get_unchecked(byte_offset + matched_char_byte_len..) + }; } - let (Some(start), Some(end)) = (start_offset, end_offset) else { - return None; - }; - - Some(start..end) + Some(start_offset..end_offset) } /// TODO: docs #[inline] -fn backward_pass(pattern: Pattern, candidate: &str, char_eq: CharEq) -> usize { +fn backward_pass( + pattern: Pattern, + mut candidate: &str, + is_candidate_ascii: bool, + is_case_sensitive: bool, + char_eq: CharEq, +) -> usize { // The candidate must start with the first character of the query. debug_assert!(char_eq( candidate.chars().next().unwrap(), @@ -170,23 +223,31 @@ fn backward_pass(pattern: Pattern, candidate: &str, char_eq: CharEq) -> usize { pattern.chars().next_back().unwrap() )); - let mut start_offset = 0; + let start_offset; - let mut query_chars = pattern.chars().rev(); + let mut pattern_chars = pattern.chars().rev(); - let mut query_char = query_chars.next().expect("query is not empty"); + let mut pattern_char = pattern_chars.next().expect("pattern is not empty"); - for (offset, candidate_char) in candidate.char_indices().rev() { - if !char_eq(query_char, candidate_char) { - continue; - } + loop { + let (byte_offset, _) = utils::find_last( + pattern_char, + candidate, + is_candidate_ascii, + is_case_sensitive, + char_eq, + ) + .unwrap(); - let Some(next_query_char) = query_chars.next() else { - start_offset = offset; + if let Some(next) = pattern_chars.next() { + pattern_char = next; + } else { + start_offset = byte_offset; break; - }; + } - query_char = next_query_char; + // SAFETY: todo. + candidate = unsafe { candidate.get_unchecked(..byte_offset) }; } start_offset diff --git a/src/algos/fzf/v2.rs b/src/algos/fzf/v2.rs index 6d1ec5c..8d8a306 100644 --- a/src/algos/fzf/v2.rs +++ b/src/algos/fzf/v2.rs @@ -46,13 +46,6 @@ impl FzfV2 { &self.scheme } - /// TODO: docs - #[inline] - pub fn with_normalization(mut self, normalization: bool) -> Self { - self.normalization = normalization; - self - } - /// TODO: docs #[inline] pub fn with_case_sensitivity( @@ -63,6 +56,13 @@ impl FzfV2 { self } + /// TODO: docs + #[inline] + pub fn with_normalization(mut self, normalization: bool) -> Self { + self.normalization = normalization; + self + } + /// TODO: docs #[inline] pub fn with_matched_ranges(mut self, matched_ranges: bool) -> Self {