Skip to content

Commit 8b32f79

Browse files
committed
Auto merge of #52910 - ljedrz:fix_48994, r=<try>
[WIP] Calculate capacity when collecting into Option and Result I was browsing the [perf page](http://perf.rust-lang.org) to see the impact of my recent changes (e.g. #52697) and I was surprised that some of the results were not as awesome as I expected. I dug some more and found an issue that is the probable culprit: [Collecting into a Result<Vec<_>> doesn't reserve the capacity in advance](#48994). Collecting into `Option` or `Result` might result in an empty collection, but there is no reason why we shouldn't provide a non-zero lower bound when we know the `Iterator` we are collecting from doesn't contain any `None` or `Err`. We know this, because the `Adapter` iterator used in the `FromIterator` implementations for `Option` and `Result` registers if any `None` or `Err` are present in the `Iterator` in question; we can use this information and return a more accurate lower bound in case we know it won't be equal to zero. I [have benchmarked](https://gist.github.com/ljedrz/c2fcc19f6260976ae7a46ae47aa71fb5) collecting into `Option` and `Result` using the current implementation and one with the proposed changes; I have also benchmarked a push loop with a known capacity as a reference that should be slower than using `FromIterator` (i.e. `collect()`). The results are quite promising: ``` test bench_collect_to_option_new ... bench: 246 ns/iter (+/- 23) test bench_collect_to_option_old ... bench: 954 ns/iter (+/- 54) test bench_collect_to_result_new ... bench: 250 ns/iter (+/- 25) test bench_collect_to_result_old ... bench: 939 ns/iter (+/- 104) test bench_push_loop_to_option ... bench: 294 ns/iter (+/- 21) test bench_push_loop_to_result ... bench: 303 ns/iter (+/- 29) ``` Fixes #48994.
2 parents 23d8d0c + 5762e67 commit 8b32f79

File tree

13 files changed

+72
-23
lines changed

13 files changed

+72
-23
lines changed

src/liballoc/collections/binary_heap.rs

+2-4
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@
146146
#![stable(feature = "rust1", since = "1.0.0")]
147147

148148
use core::ops::{Deref, DerefMut};
149-
use core::iter::{FromIterator, FusedIterator};
149+
use core::iter::{FromIterator, FusedIterator, OptimisticCollect};
150150
use core::mem::{swap, size_of, ManuallyDrop};
151151
use core::ptr;
152152
use core::fmt;
@@ -1168,9 +1168,7 @@ impl<T: Ord> SpecExtend<BinaryHeap<T>> for BinaryHeap<T> {
11681168
impl<T: Ord> BinaryHeap<T> {
11691169
fn extend_desugared<I: IntoIterator<Item = T>>(&mut self, iter: I) {
11701170
let iterator = iter.into_iter();
1171-
let (lower, _) = iterator.size_hint();
1172-
1173-
self.reserve(lower);
1171+
self.reserve(iterator.optimistic_collect_count());
11741172

11751173
for elem in iterator {
11761174
self.push(elem);

src/liballoc/collections/vec_deque.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
use core::cmp::Ordering;
1111
use core::fmt;
12-
use core::iter::{repeat_with, FromIterator, FusedIterator};
12+
use core::iter::{repeat_with, FromIterator, FusedIterator, OptimisticCollect};
1313
use core::mem;
1414
use core::ops::Bound::{Excluded, Included, Unbounded};
1515
use core::ops::{Index, IndexMut, RangeBounds, Try};
@@ -2597,8 +2597,7 @@ impl<A> IndexMut<usize> for VecDeque<A> {
25972597
impl<A> FromIterator<A> for VecDeque<A> {
25982598
fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> VecDeque<A> {
25992599
let iterator = iter.into_iter();
2600-
let (lower, _) = iterator.size_hint();
2601-
let mut deq = VecDeque::with_capacity(lower);
2600+
let mut deq = VecDeque::with_capacity(iterator.optimistic_collect_count());
26022601
deq.extend(iterator);
26032602
deq
26042603
}

src/liballoc/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
#![feature(maybe_uninit)]
115115
#![feature(alloc_layout_extra)]
116116
#![feature(try_trait)]
117+
#![feature(optimistic_collect)]
117118

118119
// Allow testing this library
119120

src/liballoc/string.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
use core::char::{decode_utf16, REPLACEMENT_CHARACTER};
5050
use core::fmt;
5151
use core::hash;
52-
use core::iter::{FromIterator, FusedIterator};
52+
use core::iter::{FromIterator, FusedIterator, OptimisticCollect};
5353
use core::ops::Bound::{Excluded, Included, Unbounded};
5454
use core::ops::{self, Add, AddAssign, Index, IndexMut, RangeBounds};
5555
use core::ptr;
@@ -1760,8 +1760,7 @@ impl<'a> FromIterator<Cow<'a, str>> for String {
17601760
impl Extend<char> for String {
17611761
fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) {
17621762
let iterator = iter.into_iter();
1763-
let (lower_bound, _) = iterator.size_hint();
1764-
self.reserve(lower_bound);
1763+
self.reserve(iterator.optimistic_collect_count());
17651764
iterator.for_each(move |c| self.push(c));
17661765
}
17671766
}

src/liballoc/vec.rs

+7-8
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ use core::cmp::{self, Ordering};
6060
use core::fmt;
6161
use core::hash::{self, Hash};
6262
use core::intrinsics::{arith_offset, assume};
63-
use core::iter::{FromIterator, FusedIterator, TrustedLen};
63+
use core::iter::{FromIterator, FusedIterator, TrustedLen, OptimisticCollect};
6464
use core::marker::PhantomData;
6565
use core::mem;
6666
use core::ops::Bound::{Excluded, Included, Unbounded};
@@ -1813,8 +1813,8 @@ impl<T, I> SpecExtend<T, I> for Vec<T>
18131813
let mut vector = match iterator.next() {
18141814
None => return Vec::new(),
18151815
Some(element) => {
1816-
let (lower, _) = iterator.size_hint();
1817-
let mut vector = Vec::with_capacity(lower.saturating_add(1));
1816+
let mut vector = Vec::with_capacity(
1817+
iterator.optimistic_collect_count().saturating_add(1));
18181818
unsafe {
18191819
ptr::write(vector.get_unchecked_mut(0), element);
18201820
vector.set_len(1);
@@ -1933,8 +1933,7 @@ impl<T> Vec<T> {
19331933
while let Some(element) = iterator.next() {
19341934
let len = self.len();
19351935
if len == self.capacity() {
1936-
let (lower, _) = iterator.size_hint();
1937-
self.reserve(lower.saturating_add(1));
1936+
self.reserve(iterator.optimistic_collect_count().saturating_add(1));
19381937
}
19391938
unsafe {
19401939
ptr::write(self.get_unchecked_mut(len), element);
@@ -2589,9 +2588,9 @@ impl<'a, I: Iterator> Drop for Splice<'a, I> {
25892588

25902589
// There may be more elements. Use the lower bound as an estimate.
25912590
// FIXME: Is the upper bound a better guess? Or something else?
2592-
let (lower_bound, _upper_bound) = self.replace_with.size_hint();
2593-
if lower_bound > 0 {
2594-
self.drain.move_tail(lower_bound);
2591+
let optimistic_collect_count = self.replace_with.optimistic_collect_count();
2592+
if optimistic_collect_count > 0 {
2593+
self.drain.move_tail(optimistic_collect_count);
25952594
if !self.drain.fill(&mut self.replace_with) {
25962595
return
25972596
}

src/libcore/benches/iter.rs

+5
Original file line numberDiff line numberDiff line change
@@ -306,3 +306,8 @@ fn bench_skip_then_zip(b: &mut Bencher) {
306306
assert_eq!(s, 2009900);
307307
});
308308
}
309+
310+
#[bench]
311+
fn bench_collect_to_result(b: &mut Bencher) {
312+
b.iter(|| (0..100).map(|e| Ok(e)).collect::<Result<Vec<usize>, ()>>())
313+
}

src/libcore/iter/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,8 @@ pub use self::traits::{ExactSizeIterator, Sum, Product};
337337
pub use self::traits::FusedIterator;
338338
#[unstable(feature = "trusted_len", issue = "37572")]
339339
pub use self::traits::TrustedLen;
340+
#[unstable(feature = "optimistic_collect", issue = "00000")]
341+
pub use self::traits::OptimisticCollect;
340342

341343
#[stable(feature = "rust1", since = "1.0.0")]
342344
pub use self::adapters::{Rev, Cycle, Chain, Zip, Map, Filter, FilterMap, Enumerate};

src/libcore/iter/traits/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ mod exact_size;
44
mod collect;
55
mod accum;
66
mod marker;
7+
mod optimistic_collect;
78

89
pub use self::iterator::Iterator;
910
pub use self::double_ended::DoubleEndedIterator;
1011
pub use self::exact_size::ExactSizeIterator;
1112
pub use self::collect::{FromIterator, IntoIterator, Extend};
1213
pub use self::accum::{Sum, Product};
1314
pub use self::marker::{FusedIterator, TrustedLen};
15+
pub use self::optimistic_collect::OptimisticCollect;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/// A specialized trait designed to improve the estimates used when preallocating collections in
2+
/// cases where `size_hint` is too conservative. For instance, when collecting into an `Option` or a
3+
/// `Result`, the most common outcome is a non-empty collection, but the protocol allows `size_hint`
4+
/// to only provide a lower bound of `0`. `OptimisticCollect` can be specialized for such cases in
5+
/// order to optimize the creation of the resulting collections without breaking `Iterator` rules.
6+
#[unstable(feature = "optimistic_collect", issue = "00000")]
7+
pub trait OptimisticCollect: Iterator {
8+
/// Provides an estimate of the size of the iterator for the purposes of preallocating
9+
/// collections that can be built from it. By default it provides the lower bound of
10+
/// `size_hint`.
11+
fn optimistic_collect_count(&self) -> usize;
12+
}
13+
14+
#[unstable(feature = "optimistic_collect", issue = "00000")]
15+
impl<I: Iterator> OptimisticCollect for I {
16+
default fn optimistic_collect_count(&self) -> usize { self.size_hint().0 }
17+
}
18+
19+
#[unstable(feature = "optimistic_collect", issue = "00000")]
20+
impl<I: Iterator> OptimisticCollect for &mut I {
21+
default fn optimistic_collect_count(&self) -> usize { (**self).size_hint().0 }
22+
}

src/libcore/option.rs

+12-1
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@
135135
136136
#![stable(feature = "rust1", since = "1.0.0")]
137137

138-
use iter::{FromIterator, FusedIterator, TrustedLen};
138+
use iter::{FromIterator, FusedIterator, TrustedLen, OptimisticCollect};
139139
use {hint, mem, ops::{self, Deref}};
140140
use pin::Pin;
141141

@@ -1356,6 +1356,17 @@ impl<A, V: FromIterator<A>> FromIterator<Option<A>> for Option<V> {
13561356
}
13571357
}
13581358

1359+
impl<T, Iter: Iterator<Item = Option<T>>> OptimisticCollect for Adapter<Iter> {
1360+
#[inline]
1361+
fn optimistic_collect_count(&self) -> usize {
1362+
if self.found_none {
1363+
0
1364+
} else {
1365+
self.iter.optimistic_collect_count()
1366+
}
1367+
}
1368+
}
1369+
13591370
let mut adapter = Adapter { iter: iter.into_iter(), found_none: false };
13601371
let v: V = FromIterator::from_iter(adapter.by_ref());
13611372

src/libcore/result.rs

+12-1
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@
231231
#![stable(feature = "rust1", since = "1.0.0")]
232232

233233
use fmt;
234-
use iter::{FromIterator, FusedIterator, TrustedLen};
234+
use iter::{FromIterator, FusedIterator, TrustedLen, OptimisticCollect};
235235
use ops::{self, Deref};
236236

237237
/// `Result` is a type that represents either success ([`Ok`]) or failure ([`Err`]).
@@ -1233,6 +1233,17 @@ impl<A, E, V: FromIterator<A>> FromIterator<Result<A, E>> for Result<V, E> {
12331233
}
12341234
}
12351235

1236+
impl<T, E, Iter: Iterator<Item = Result<T, E>>> OptimisticCollect for Adapter<Iter, E> {
1237+
#[inline]
1238+
fn optimistic_collect_count(&self) -> usize {
1239+
if self.err.is_some() {
1240+
0
1241+
} else {
1242+
self.iter.optimistic_collect_count()
1243+
}
1244+
}
1245+
}
1246+
12361247
let mut adapter = Adapter { iter: iter.into_iter(), err: None };
12371248
let v: V = FromIterator::from_iter(adapter.by_ref());
12381249

src/libstd/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@
297297
#![feature(non_exhaustive)]
298298
#![feature(alloc_layout_extra)]
299299
#![feature(maybe_uninit)]
300+
#![feature(optimistic_collect)]
300301
#![cfg_attr(all(target_vendor = "fortanix", target_env = "sgx"),
301302
feature(global_asm, range_contains, slice_index_methods,
302303
decl_macro, coerce_unsized, sgx_platform, ptr_wrapping_offset_from))]

src/libstd/sys_common/wtf8.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use borrow::Cow;
2121
use char;
2222
use fmt;
2323
use hash::{Hash, Hasher};
24-
use iter::FromIterator;
24+
use iter::{FromIterator, OptimisticCollect};
2525
use mem;
2626
use ops;
2727
use rc::Rc;
@@ -385,9 +385,8 @@ impl FromIterator<CodePoint> for Wtf8Buf {
385385
impl Extend<CodePoint> for Wtf8Buf {
386386
fn extend<T: IntoIterator<Item=CodePoint>>(&mut self, iter: T) {
387387
let iterator = iter.into_iter();
388-
let (low, _high) = iterator.size_hint();
389388
// Lower bound of one byte per code point (ASCII only)
390-
self.bytes.reserve(low);
389+
self.bytes.reserve(iterator.optimistic_collect_count());
391390
for code_point in iterator {
392391
self.push(code_point);
393392
}

0 commit comments

Comments
 (0)