Skip to content

Commit

Permalink
Add VarZeroSliceIter
Browse files Browse the repository at this point in the history
  • Loading branch information
Manishearth committed Dec 18, 2024
1 parent 370ade0 commit dcd3b9c
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 29 deletions.
4 changes: 3 additions & 1 deletion utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,9 @@ pub mod vecs {
#[doc(no_inline)]
pub use crate::varzerovec::{VarZeroSlice, VarZeroVec};

pub use crate::varzerovec::{Index16, Index32, Index8, VarZeroVecFormat, VarZeroVecOwned};
pub use crate::varzerovec::{
Index16, Index32, Index8, VarZeroSliceIter, VarZeroVecFormat, VarZeroVecOwned,
};

pub type VarZeroVec16<'a, T> = VarZeroVec<'a, T, Index16>;
pub type VarZeroVec32<'a, T> = VarZeroVec<'a, T, Index32>;
Expand Down
101 changes: 76 additions & 25 deletions utils/zerovec/src/varzerovec/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,31 +475,8 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>

/// Create an iterator over the Ts contained in VarZeroVecComponents
#[inline]
pub fn iter(self) -> impl Iterator<Item = &'a T> {
// The indices array doesn't contain 0 or len, we need to graft it on
// However we don't want to graft it on for an empty vector.
let (begin, end) = if self.is_empty() {
(None, None)
} else {
(Some(0), Some(self.things.len()))
};
begin
.into_iter()
.chain(
self.indices_slice()
.iter()
.copied()
.map(IntegerULE::iule_to_usize),
)
.zip(
self.indices_slice()
.iter()
.copied()
.map(IntegerULE::iule_to_usize)
.chain(end),
)
.map(move |(start, end)| unsafe { self.things.get_unchecked(start..end) })
.map(|bytes| unsafe { T::from_bytes_unchecked(bytes) })
pub fn iter(self) -> VarZeroSliceIter<'a, T, F> {
VarZeroSliceIter::new(self)
}

pub fn to_vec(self) -> Vec<Box<T>> {
Expand All @@ -524,6 +501,80 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>
}
}

/// An iterator over VarZeroSlice
#[derive(Debug)]
pub struct VarZeroSliceIter<'a, T: ?Sized, F> {
components: VarZeroVecComponents<'a, T, F>,
index: usize,
// Safety invariant: must be a valid index into the data segment of `components`, or an index at the end
// i.e. start_index <= components.things.len()
//
// It must be a valid index into the `things` array of components, coming from `components.indices_slice()`
start_index: usize,
}

impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSliceIter<'a, T, F> {
fn new(c: VarZeroVecComponents<'a, T, F>) -> Self {
Self {
components: c,
index: 0,
// Invariant upheld, 0 is always a valid index-or-end
start_index: 0,
}
}
}
impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> Iterator for VarZeroSliceIter<'a, T, F> {
type Item = &'a T;

fn next(&mut self) -> Option<Self::Item> {
// Note: the indices array doesn't contain 0 or len, we need to specially handle those edges. The 0 is handled
// by start_index, and the len is handled by the code for `end`.

if self.index >= self.components.len() {
return None;
}

// Invariant established: self.index is in bounds for self.components.len(),
// which means it is in bounds for self.components.indices_slice() since that has the same length

let end = if self.index + 1 == self.components.len() {
// We don't store the end index since it is computable, so the last element should use self.components.things.len()
self.components.things.len()
} else {
// Safety: self.index was known to be in bounds from the bounds check above.
unsafe {
self.components
.indices_slice()
.get_unchecked(self.index)
.iule_to_usize()
}
};
// Invariant established: end has the same invariant as self.start_index since it comes from indices_slice, which is guaranteed
// to only contain valid indexes

let item = unsafe {
// Safety: self.start_index and end both have in-range invariants, plus they are valid indices from indices_slice
// which means we can treat this data as a T
T::from_bytes_unchecked(self.components.things.get_unchecked(self.start_index..end))
};
self.index += 1;
// Invariant upheld: end has the same invariant as self.start_index
self.start_index = end;
Some(item)
}

fn size_hint(&self) -> (usize, Option<usize>) {
let remainder = self.components.len() - self.index;
(remainder, Some(remainder))
}
}

impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> ExactSizeIterator for VarZeroSliceIter<'a, T, F> {
fn len(&self) -> usize {
self.components.len()
}
}

impl<'a, T, F> VarZeroVecComponents<'a, T, F>
where
T: VarULE,
Expand Down
2 changes: 1 addition & 1 deletion utils/zerovec/src/varzerovec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub use crate::{VarZeroSlice, VarZeroVec};
#[doc(hidden)]
pub use components::VarZeroVecComponents;

pub use components::{Index16, Index32, Index8, VarZeroVecFormat};
pub use components::{Index16, Index32, Index8, VarZeroSliceIter, VarZeroVecFormat};

pub use owned::VarZeroVecOwned;

Expand Down
4 changes: 2 additions & 2 deletions utils/zerovec/src/varzerovec/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use super::components::VarZeroVecComponents;
use super::components::{VarZeroSliceIter, VarZeroVecComponents};
use super::vec::VarZeroVecInner;
use super::*;
use crate::ule::*;
Expand Down Expand Up @@ -179,7 +179,7 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
/// assert_eq!(iter_results[2], "baz");
/// assert_eq!(iter_results[3], "quux");
/// ```
pub fn iter<'b>(&'b self) -> impl Iterator<Item = &'b T> {
pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> {
self.as_components().iter()
}

Expand Down

0 comments on commit dcd3b9c

Please sign in to comment.