Skip to content

Commit

Permalink
cmp iters
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian committed May 3, 2023
1 parent 20a4655 commit 0bee697
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 17 deletions.
29 changes: 12 additions & 17 deletions components/segmenter/src/complex/lstm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,24 +155,19 @@ impl<'l> LstmSegmenter<'l> {
return self.dic.len() as u16;
};

// The maximum UTF-8 size of a grapheme cluster seems to be 41 bytes
let mut i = 0;
let mut buf = [0; 41];

#[allow(clippy::unwrap_used)]
// debug_asserting whether my assumption is correct
decode_utf16(grapheme_cluster.iter().copied()).for_each(|c| {
debug_assert!(i < 37);
i += c
.unwrap_or(REPLACEMENT_CHARACTER)
.encode_utf8(&mut buf[i..])
.len()
});

#[allow(clippy::unwrap_used)]
// debug_asserting whether my assumption is correct
self.dic
.get_copied(UnvalidatedStr::from_bytes(&buf[..i]))
.get_copied_by(|key| {
key.as_bytes().iter().copied().cmp(
decode_utf16(grapheme_cluster.iter().copied()).flat_map(|c| {
let mut buf = [0; 4];
let len = c
.unwrap_or(REPLACEMENT_CHARACTER)
.encode_utf8(&mut buf)
.len();
buf.into_iter().take(len)
}),
)
})
.unwrap_or_else(|| self.dic.len() as u16)
})
.collect()
Expand Down
6 changes: 6 additions & 0 deletions utils/zerovec/src/map/borrowed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ where
self.values.get(index)
}

/// For cases when `V` is fixed-size, obtain a direct copy of `V` instead of `V::ULE`
pub fn get_copied_by(&self, predicate: impl FnMut(&K) -> Ordering) -> Option<V> {
let index = self.keys.zvl_binary_search_by(predicate).ok()?;
self.values.get(index)
}

/// Similar to [`Self::iter()`] except it returns a direct copy of the values instead of references
/// to `V::ULE`, in cases when `V` is fixed-size
pub fn iter_copied_values<'b>(
Expand Down

0 comments on commit 0bee697

Please sign in to comment.