Skip to content

Zip implementation optimization using TrustedRandomAccess can remove observable side-effects. #85890

@steffahn

Description

@steffahn
fn f(x: impl Iterator) {
    let y = x.zip(<[()]>::iter(&[]));
    let mut z = y.zip(<[()]>::iter(&[]));
    z.next();
}

fn main() {
    let x = [1, 2, 3].iter().map(|_| println!("  hi!"));
    println!("first:");
    f(Box::new(x.clone())); // prints "hi!" (Box doesn’t implement TrustedRandomAccess)
    println!("second:");
    f(x.clone()); // doesn’t print "hi!" (Cloned<slice::Iter> implements TrustedRandomAccess)
    println!("done.");
}
first:
  hi!
second:
done.

(playground)

This is a flaw in the current specialized implementation of next().

impl<A, B> ZipImpl<A, B> for Zip<A, B>
where
    A: TrustedRandomAccess + Iterator,
    B: TrustedRandomAccess + Iterator,
{
    #[inline]
    fn next(&mut self) -> Option<(A::Item, B::Item)> {
        if self.index < self.len {
            let i = self.index;
            self.index += 1;
            // SAFETY: `i` is smaller than `self.len`, thus smaller than `self.a.len()` and `self.b.len()`
            unsafe {
                Some((self.a.__iterator_get_unchecked(i), self.b.__iterator_get_unchecked(i)))
            }
        } else if A::MAY_HAVE_SIDE_EFFECT && self.index < self.a_len {
            let i = self.index;
            self.index += 1;
            self.len += 1;
            // match the base implementation's potential side effects
            // SAFETY: we just checked that `i` < `self.a.len()`
            unsafe {
                self.a.__iterator_get_unchecked(i);
            }
            None
        } else {
            None
        }
    }}

This implementation makes sure to keep trying to evaluate the first iterator in the Zip like the short-circuiting default implementation

impl<A, B> ZipImpl<A, B> for Zip<A, B>
where
   A: Iterator,
   B: Iterator,
{
    #[inline]
   default fn next(&mut self) -> Option<(A::Item, B::Item)> {
       let x = self.a.next()?;
       let y = self.b.next()?;
       Some((x, y))
   }}

However, the default implementation does it differently: It keeps polling the first iterator even if it’s empty as indicated by its size_hint, whereas the TrustedRandomAccess-using implementation stops as soon as the first iterator is known to be empty based on size calculations.

@rustbot label T-libs-impl, A-iterators

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-iteratorsArea: IteratorsC-bugCategory: This is a bug.T-libsRelevant to the library team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions