Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add VarZeroSliceIter, ZeroSliceIter #5924

Merged
merged 3 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion utils/zerovec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -289,10 +289,14 @@ pub mod vecs {
#[doc(no_inline)]
pub use crate::zerovec::{ZeroSlice, ZeroVec};

pub use crate::zerovec::ZeroSliceIter;

#[doc(no_inline)]
pub use crate::varzerovec::{VarZeroSlice, VarZeroVec};

pub use crate::varzerovec::{Index16, Index32, Index8, VarZeroVecFormat, VarZeroVecOwned};
pub use crate::varzerovec::{
Index16, Index32, Index8, VarZeroSliceIter, VarZeroVecFormat, VarZeroVecOwned,
};

pub type VarZeroVec16<'a, T> = VarZeroVec<'a, T, Index16>;
pub type VarZeroVec32<'a, T> = VarZeroVec<'a, T, Index32>;
Expand Down
101 changes: 76 additions & 25 deletions utils/zerovec/src/varzerovec/components.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,31 +475,8 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>

/// Create an iterator over the Ts contained in VarZeroVecComponents
#[inline]
pub fn iter(self) -> impl Iterator<Item = &'a T> {
// The indices array doesn't contain 0 or len, we need to graft it on
// However we don't want to graft it on for an empty vector.
let (begin, end) = if self.is_empty() {
(None, None)
} else {
(Some(0), Some(self.things.len()))
};
begin
.into_iter()
.chain(
self.indices_slice()
.iter()
.copied()
.map(IntegerULE::iule_to_usize),
)
.zip(
self.indices_slice()
.iter()
.copied()
.map(IntegerULE::iule_to_usize)
.chain(end),
)
.map(move |(start, end)| unsafe { self.things.get_unchecked(start..end) })
.map(|bytes| unsafe { T::from_bytes_unchecked(bytes) })
pub fn iter(self) -> VarZeroSliceIter<'a, T, F> {
VarZeroSliceIter::new(self)
}

pub fn to_vec(self) -> Vec<Box<T>> {
Expand All @@ -524,6 +501,80 @@ impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroVecComponents<'a, T, F>
}
}

/// An iterator over VarZeroSlice
#[derive(Debug)]
pub struct VarZeroSliceIter<'a, T: ?Sized, F> {
components: VarZeroVecComponents<'a, T, F>,
index: usize,
// Safety invariant: must be a valid index into the data segment of `components`, or an index at the end
// i.e. start_index <= components.things.len()
//
// It must be a valid index into the `things` array of components, coming from `components.indices_slice()`
start_index: usize,
}

impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSliceIter<'a, T, F> {
fn new(c: VarZeroVecComponents<'a, T, F>) -> Self {
Self {
components: c,
index: 0,
// Invariant upheld, 0 is always a valid index-or-end
start_index: 0,
}
}
}
impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> Iterator for VarZeroSliceIter<'a, T, F> {
type Item = &'a T;

fn next(&mut self) -> Option<Self::Item> {
// Note: the indices array doesn't contain 0 or len, we need to specially handle those edges. The 0 is handled
// by start_index, and the len is handled by the code for `end`.

if self.index >= self.components.len() {
return None;
}

// Invariant established: self.index is in bounds for self.components.len(),
// which means it is in bounds for self.components.indices_slice() since that has the same length

let end = if self.index + 1 == self.components.len() {
// We don't store the end index since it is computable, so the last element should use self.components.things.len()
self.components.things.len()
} else {
// Safety: self.index was known to be in bounds from the bounds check above.
unsafe {
self.components
.indices_slice()
.get_unchecked(self.index)
.iule_to_usize()
}
};
// Invariant established: end has the same invariant as self.start_index since it comes from indices_slice, which is guaranteed
// to only contain valid indexes

let item = unsafe {
// Safety: self.start_index and end both have in-range invariants, plus they are valid indices from indices_slice
// which means we can treat this data as a T
T::from_bytes_unchecked(self.components.things.get_unchecked(self.start_index..end))
};
self.index += 1;
// Invariant upheld: end has the same invariant as self.start_index
self.start_index = end;
Some(item)
}

fn size_hint(&self) -> (usize, Option<usize>) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Praise: Thanks for adding size_hint; this is an improvement over what we had before, I think

let remainder = self.components.len() - self.index;
(remainder, Some(remainder))
}
}

impl<'a, T: VarULE + ?Sized, F: VarZeroVecFormat> ExactSizeIterator for VarZeroSliceIter<'a, T, F> {
fn len(&self) -> usize {
self.components.len() - self.index
}
}

impl<'a, T, F> VarZeroVecComponents<'a, T, F>
where
T: VarULE,
Expand Down
2 changes: 1 addition & 1 deletion utils/zerovec/src/varzerovec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ pub use crate::{VarZeroSlice, VarZeroVec};
#[doc(hidden)]
pub use components::VarZeroVecComponents;

pub use components::{Index16, Index32, Index8, VarZeroVecFormat};
pub use components::{Index16, Index32, Index8, VarZeroSliceIter, VarZeroVecFormat};

pub use owned::VarZeroVecOwned;

Expand Down
4 changes: 2 additions & 2 deletions utils/zerovec/src/varzerovec/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use super::components::VarZeroVecComponents;
use super::components::{VarZeroSliceIter, VarZeroVecComponents};
use super::vec::VarZeroVecInner;
use super::*;
use crate::ule::*;
Expand Down Expand Up @@ -179,7 +179,7 @@ impl<T: VarULE + ?Sized, F: VarZeroVecFormat> VarZeroSlice<T, F> {
/// assert_eq!(iter_results[2], "baz");
/// assert_eq!(iter_results[3], "quux");
/// ```
pub fn iter<'b>(&'b self) -> impl Iterator<Item = &'b T> {
pub fn iter<'b>(&'b self) -> VarZeroSliceIter<'b, T, F> {
self.as_components().iter()
}

Expand Down
1 change: 1 addition & 0 deletions utils/zerovec/src/zerovec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ mod serde;
mod slice;

pub use slice::ZeroSlice;
pub use slice::ZeroSliceIter;

use crate::ule::*;
use alloc::borrow::Cow;
Expand Down
27 changes: 25 additions & 2 deletions utils/zerovec/src/zerovec/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,8 @@ where
/// assert_eq!(it.next(), None);
/// ```
#[inline]
pub fn iter(&self) -> impl DoubleEndedIterator<Item = T> + ExactSizeIterator<Item = T> + '_ {
self.as_ule_slice().iter().copied().map(T::from_unaligned)
pub fn iter<'a>(&'a self) -> ZeroSliceIter<'a, T> {
ZeroSliceIter(self.as_ule_slice().iter())
}

/// Returns a tuple with the first element and a subslice of the remaining elements.
Expand Down Expand Up @@ -401,6 +401,29 @@ where
}
}

/// An iterator over elements in a VarZeroVec
#[derive(Debug)]
pub struct ZeroSliceIter<'a, T: AsULE>(core::slice::Iter<'a, T::ULE>);

impl<'a, T: AsULE> Iterator for ZeroSliceIter<'a, T> {
type Item = T;
fn next(&mut self) -> Option<T> {
self.0.next().copied().map(T::from_unaligned)
}
}

impl<'a, T: AsULE> ExactSizeIterator for ZeroSliceIter<'a, T> {
fn len(&self) -> usize {
self.0.len()
}
}

impl<'a, T: AsULE> DoubleEndedIterator for ZeroSliceIter<'a, T> {
fn next_back(&mut self) -> Option<T> {
self.0.next_back().copied().map(T::from_unaligned)
}
}

impl<T> ZeroSlice<T>
where
T: AsULE + Ord,
Expand Down
Loading