From dcd3b9c62c5f5fae26409f58c003f8f0e8839987 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Wed, 18 Dec 2024 15:10:54 -0800 Subject: [PATCH] Add VarZeroSliceIter --- utils/zerovec/src/lib.rs | 4 +- utils/zerovec/src/varzerovec/components.rs | 101 ++++++++++++++++----- utils/zerovec/src/varzerovec/mod.rs | 2 +- utils/zerovec/src/varzerovec/slice.rs | 4 +- 4 files changed, 82 insertions(+), 29 deletions(-) diff --git a/utils/zerovec/src/lib.rs b/utils/zerovec/src/lib.rs index eabf1144c8c..19d697845af 100644 --- a/utils/zerovec/src/lib.rs +++ b/utils/zerovec/src/lib.rs @@ -292,7 +292,9 @@ pub mod vecs { #[doc(no_inline)] pub use crate::varzerovec::{VarZeroSlice, VarZeroVec}; - pub use crate::varzerovec::{Index16, Index32, Index8, VarZeroVecFormat, VarZeroVecOwned}; + pub use crate::varzerovec::{ + Index16, Index32, Index8, VarZeroSliceIter, VarZeroVecFormat, VarZeroVecOwned, + }; pub type VarZeroVec16<'a, T> = VarZeroVec<'a, T, Index16>; pub type VarZeroVec32<'a, T> = VarZeroVec<'a, T, Index32>; diff --git a/utils/zerovec/src/varzerovec/components.rs b/utils/zerovec/src/varzerovec/components.rs index 890a58260c2..9b7d81a6b82 100644 --- a/utils/zerovec/src/varzerovec/components.rs +++ b/utils/zerovec/src/varzerovec/components.rs @@ -475,31 +475,8 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> /// Create an iterator over the Ts contained in VarZeroVecComponents #[inline] - pub fn iter(self) -> impl Iterator { - // The indices array doesn't contain 0 or len, we need to graft it on - // However we don't want to graft it on for an empty vector. - let (begin, end) = if self.is_empty() { - (None, None) - } else { - (Some(0), Some(self.things.len())) - }; - begin - .into_iter() - .chain( - self.indices_slice() - .iter() - .copied() - .map(IntegerULE::iule_to_usize), - ) - .zip( - self.indices_slice() - .iter() - .copied() - .map(IntegerULE::iule_to_usize) - .chain(end), - ) - .map(move |(start, end)| unsafe { self.things.get_unchecked(start..end) }) - .map(|bytes| unsafe { T::from_bytes_unchecked(bytes) }) + pub fn iter(self) -> VarZeroSliceIter<'a, T, F> { + VarZeroSliceIter::new(self) } pub fn to_vec(self) -> Vec> { @@ -524,6 +501,80 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F> } } +/// An iterator over VarZeroSlice +#[derive(Debug)] +pub struct VarZeroSliceIter<'a, T: ?Sized, F> { + components: VarZeroVecComponents<'a, T, F>, + index: usize, + // Safety invariant: must be a valid index into the data segment of `components`, or an index at the end + // i.e. start_index <= components.things.len() + // + // It must be a valid index into the `things` array of components, coming from `components.indices_slice()` + start_index: usize, +} + +impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSliceIter<'a, T, F> { + fn new(c: VarZeroVecComponents<'a, T, F>) -> Self { + Self { + components: c, + index: 0, + // Invariant upheld, 0 is always a valid index-or-end + start_index: 0, + } + } +} +impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> Iterator for VarZeroSliceIter<'a, T, F> { + type Item = &'a T; + + fn next(&mut self) -> Option { + // Note: the indices array doesn't contain 0 or len, we need to specially handle those edges. The 0 is handled + // by start_index, and the len is handled by the code for `end`. + + if self.index >= self.components.len() { + return None; + } + + // Invariant established: self.index is in bounds for self.components.len(), + // which means it is in bounds for self.components.indices_slice() since that has the same length + + let end = if self.index + 1 == self.components.len() { + // We don't store the end index since it is computable, so the last element should use self.components.things.len() + self.components.things.len() + } else { + // Safety: self.index was known to be in bounds from the bounds check above. + unsafe { + self.components + .indices_slice() + .get_unchecked(self.index) + .iule_to_usize() + } + }; + // Invariant established: end has the same invariant as self.start_index since it comes from indices_slice, which is guaranteed + // to only contain valid indexes + + let item = unsafe { + // Safety: self.start_index and end both have in-range invariants, plus they are valid indices from indices_slice + // which means we can treat this data as a T + T::from_bytes_unchecked(self.components.things.get_unchecked(self.start_index..end)) + }; + self.index += 1; + // Invariant upheld: end has the same invariant as self.start_index + self.start_index = end; + Some(item) + } + + fn size_hint(&self) -> (usize, Option) { + let remainder = self.components.len() - self.index; + (remainder, Some(remainder)) + } +} + +impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> ExactSizeIterator for VarZeroSliceIter<'a, T, F> { + fn len(&self) -> usize { + self.components.len() + } +} + impl<'a, T, F> VarZeroVecComponents<'a, T, F> where T: VarULE, diff --git a/utils/zerovec/src/varzerovec/mod.rs b/utils/zerovec/src/varzerovec/mod.rs index 8967bc6887f..94ad5ab69e8 100644 --- a/utils/zerovec/src/varzerovec/mod.rs +++ b/utils/zerovec/src/varzerovec/mod.rs @@ -23,7 +23,7 @@ pub use crate::{VarZeroSlice, VarZeroVec}; #[doc(hidden)] pub use components::VarZeroVecComponents; -pub use components::{Index16, Index32, Index8, VarZeroVecFormat}; +pub use components::{Index16, Index32, Index8, VarZeroSliceIter, VarZeroVecFormat}; pub use owned::VarZeroVecOwned; diff --git a/utils/zerovec/src/varzerovec/slice.rs b/utils/zerovec/src/varzerovec/slice.rs index 42e66316f63..03801042f0a 100644 --- a/utils/zerovec/src/varzerovec/slice.rs +++ b/utils/zerovec/src/varzerovec/slice.rs @@ -2,7 +2,7 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use super::components::VarZeroVecComponents; +use super::components::{VarZeroSliceIter, VarZeroVecComponents}; use super::vec::VarZeroVecInner; use super::*; use crate::ule::*; @@ -179,7 +179,7 @@ impl VarZeroSlice { /// assert_eq!(iter_results[2], "baz"); /// assert_eq!(iter_results[3], "quux"); /// ``` - pub fn iter<'b>(&'b self) -> impl Iterator { + pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> { self.as_components().iter() }