Skip to content

Commit

Permalink
Auto merge of rust-lang#115273 - the8472:take-fold, r=cuviper
Browse files Browse the repository at this point in the history
Optimize Take::{fold, for_each} when wrapping TrustedRandomAccess iterators
  • Loading branch information
bors committed Sep 2, 2023
2 parents 9229b1e + f93e125 commit c4f2577
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 19 deletions.
101 changes: 82 additions & 19 deletions library/core/src/iter/adapters/take.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use crate::cmp;
use crate::iter::{adapters::SourceIter, FusedIterator, InPlaceIterable, TrustedLen};
use crate::iter::{
adapters::SourceIter, FusedIterator, InPlaceIterable, TrustedLen, TrustedRandomAccess,
};
use crate::num::NonZeroUsize;
use crate::ops::{ControlFlow, Try};

Expand Down Expand Up @@ -98,26 +100,18 @@ where
}
}

impl_fold_via_try_fold! { fold -> try_fold }

#[inline]
fn for_each<F: FnMut(Self::Item)>(mut self, f: F) {
// The default implementation would use a unit accumulator, so we can
// avoid a stateful closure by folding over the remaining number
// of items we wish to return instead.
fn check<'a, Item>(
mut action: impl FnMut(Item) + 'a,
) -> impl FnMut(usize, Item) -> Option<usize> + 'a {
move |more, x| {
action(x);
more.checked_sub(1)
}
}
fn fold<B, F>(self, init: B, f: F) -> B
where
Self: Sized,
F: FnMut(B, Self::Item) -> B,
{
Self::spec_fold(self, init, f)
}

let remaining = self.n;
if remaining > 0 {
self.iter.try_fold(remaining - 1, check(f));
}
#[inline]
fn for_each<F: FnMut(Self::Item)>(self, f: F) {
Self::spec_for_each(self, f)
}

#[inline]
Expand Down Expand Up @@ -249,3 +243,72 @@ impl<I> FusedIterator for Take<I> where I: FusedIterator {}

#[unstable(feature = "trusted_len", issue = "37572")]
unsafe impl<I: TrustedLen> TrustedLen for Take<I> {}

trait SpecTake: Iterator {
fn spec_fold<B, F>(self, init: B, f: F) -> B
where
Self: Sized,
F: FnMut(B, Self::Item) -> B;

fn spec_for_each<F: FnMut(Self::Item)>(self, f: F);
}

impl<I: Iterator> SpecTake for Take<I> {
#[inline]
default fn spec_fold<B, F>(mut self, init: B, f: F) -> B
where
Self: Sized,
F: FnMut(B, Self::Item) -> B,
{
use crate::ops::NeverShortCircuit;
self.try_fold(init, NeverShortCircuit::wrap_mut_2(f)).0
}

#[inline]
default fn spec_for_each<F: FnMut(Self::Item)>(mut self, f: F) {
// The default implementation would use a unit accumulator, so we can
// avoid a stateful closure by folding over the remaining number
// of items we wish to return instead.
fn check<'a, Item>(
mut action: impl FnMut(Item) + 'a,
) -> impl FnMut(usize, Item) -> Option<usize> + 'a {
move |more, x| {
action(x);
more.checked_sub(1)
}
}

let remaining = self.n;
if remaining > 0 {
self.iter.try_fold(remaining - 1, check(f));
}
}
}

impl<I: Iterator + TrustedRandomAccess> SpecTake for Take<I> {
#[inline]
fn spec_fold<B, F>(mut self, init: B, mut f: F) -> B
where
Self: Sized,
F: FnMut(B, Self::Item) -> B,
{
let mut acc = init;
let end = self.n.min(self.iter.size());
for i in 0..end {
// SAFETY: i < end <= self.iter.size() and we discard the iterator at the end
let val = unsafe { self.iter.__iterator_get_unchecked(i) };
acc = f(acc, val);
}
acc
}

#[inline]
fn spec_for_each<F: FnMut(Self::Item)>(mut self, mut f: F) {
let end = self.n.min(self.iter.size());
for i in 0..end {
// SAFETY: i < end <= self.iter.size() and we discard the iterator at the end
let val = unsafe { self.iter.__iterator_get_unchecked(i) };
f(val);
}
}
}
15 changes: 15 additions & 0 deletions tests/codegen/lib-optimizations/iter-sum.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// ignore-debug: the debug assertions get in the way
// compile-flags: -O
// only-x86_64 (vectorization varies between architectures)
#![crate_type = "lib"]


// Ensure that slice + take + sum gets vectorized.
// Currently this relies on the slice::Iter::try_fold implementation
// CHECK-LABEL: @slice_take_sum
#[no_mangle]
pub fn slice_take_sum(s: &[u64], l: usize) -> u64 {
// CHECK: vector.body:
// CHECK: ret
s.iter().take(l).sum()
}

0 comments on commit c4f2577

Please sign in to comment.