Skip to content

Commit 4ecfdfa

Browse files
committed
Auto merge of #100214 - scottmcm:strict-range, r=thomcc
Optimize `array::IntoIter` `.into_iter()` on arrays was slower than it needed to be (especially compared to slice iterator) since it uses `Range<usize>`, which needs to handle degenerate ranges like `10..4`. This PR adds an internal `IndexRange` type that's like `Range<usize>` but with a safety invariant that means it doesn't need to worry about those cases -- it only handles `start <= end` -- and thus can give LLVM more information to optimize better. I added one simple demonstration of the improvement as a codegen test. (`vec::IntoIter` uses pointers instead of indexes, so doesn't have this problem, but that only works because its elements are boxed. `array::IntoIter` can't use pointers because that would keep it from being movable.)
2 parents 7743aa8 + 6dbd9a2 commit 4ecfdfa

File tree

6 files changed

+282
-31
lines changed

6 files changed

+282
-31
lines changed

library/core/src/array/iter.rs

+23-31
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
//! Defines the `IntoIter` owned iterator for arrays.
22
33
use crate::{
4-
cmp, fmt,
4+
fmt,
55
iter::{self, ExactSizeIterator, FusedIterator, TrustedLen},
66
mem::{self, MaybeUninit},
7-
ops::Range,
7+
ops::{IndexRange, Range},
88
ptr,
99
};
1010

@@ -29,9 +29,10 @@ pub struct IntoIter<T, const N: usize> {
2929
/// The elements in `data` that have not been yielded yet.
3030
///
3131
/// Invariants:
32-
/// - `alive.start <= alive.end`
3332
/// - `alive.end <= N`
34-
alive: Range<usize>,
33+
///
34+
/// (And the `IndexRange` type requires `alive.start <= alive.end`.)
35+
alive: IndexRange,
3536
}
3637

3738
// Note: the `#[rustc_skip_array_during_method_dispatch]` on `trait IntoIterator`
@@ -69,7 +70,7 @@ impl<T, const N: usize> IntoIterator for [T; N] {
6970
// Until then, we can use `mem::transmute_copy` to create a bitwise copy
7071
// as a different type, then forget `array` so that it is not dropped.
7172
unsafe {
72-
let iter = IntoIter { data: mem::transmute_copy(&self), alive: 0..N };
73+
let iter = IntoIter { data: mem::transmute_copy(&self), alive: IndexRange::zero_to(N) };
7374
mem::forget(self);
7475
iter
7576
}
@@ -147,7 +148,9 @@ impl<T, const N: usize> IntoIter<T, N> {
147148
buffer: [MaybeUninit<T>; N],
148149
initialized: Range<usize>,
149150
) -> Self {
150-
Self { data: buffer, alive: initialized }
151+
// SAFETY: one of our safety conditions is that the range is canonical.
152+
let alive = unsafe { IndexRange::new_unchecked(initialized.start, initialized.end) };
153+
Self { data: buffer, alive }
151154
}
152155

153156
/// Creates an iterator over `T` which returns no elements.
@@ -283,24 +286,19 @@ impl<T, const N: usize> Iterator for IntoIter<T, N> {
283286
}
284287

285288
fn advance_by(&mut self, n: usize) -> Result<(), usize> {
286-
let len = self.len();
287-
288-
// The number of elements to drop. Always in-bounds by construction.
289-
let delta = cmp::min(n, len);
290-
291-
let range_to_drop = self.alive.start..(self.alive.start + delta);
289+
let original_len = self.len();
292290

293-
// Moving the start marks them as conceptually "dropped", so if anything
294-
// goes bad then our drop impl won't double-free them.
295-
self.alive.start += delta;
291+
// This also moves the start, which marks them as conceptually "dropped",
292+
// so if anything goes bad then our drop impl won't double-free them.
293+
let range_to_drop = self.alive.take_prefix(n);
296294

297295
// SAFETY: These elements are currently initialized, so it's fine to drop them.
298296
unsafe {
299297
let slice = self.data.get_unchecked_mut(range_to_drop);
300298
ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(slice));
301299
}
302300

303-
if n > len { Err(len) } else { Ok(()) }
301+
if n > original_len { Err(original_len) } else { Ok(()) }
304302
}
305303
}
306304

@@ -338,24 +336,19 @@ impl<T, const N: usize> DoubleEndedIterator for IntoIter<T, N> {
338336
}
339337

340338
fn advance_back_by(&mut self, n: usize) -> Result<(), usize> {
341-
let len = self.len();
342-
343-
// The number of elements to drop. Always in-bounds by construction.
344-
let delta = cmp::min(n, len);
345-
346-
let range_to_drop = (self.alive.end - delta)..self.alive.end;
339+
let original_len = self.len();
347340

348-
// Moving the end marks them as conceptually "dropped", so if anything
349-
// goes bad then our drop impl won't double-free them.
350-
self.alive.end -= delta;
341+
// This also moves the end, which marks them as conceptually "dropped",
342+
// so if anything goes bad then our drop impl won't double-free them.
343+
let range_to_drop = self.alive.take_suffix(n);
351344

352345
// SAFETY: These elements are currently initialized, so it's fine to drop them.
353346
unsafe {
354347
let slice = self.data.get_unchecked_mut(range_to_drop);
355348
ptr::drop_in_place(MaybeUninit::slice_assume_init_mut(slice));
356349
}
357350

358-
if n > len { Err(len) } else { Ok(()) }
351+
if n > original_len { Err(original_len) } else { Ok(()) }
359352
}
360353
}
361354

@@ -372,9 +365,7 @@ impl<T, const N: usize> Drop for IntoIter<T, N> {
372365
#[stable(feature = "array_value_iter_impls", since = "1.40.0")]
373366
impl<T, const N: usize> ExactSizeIterator for IntoIter<T, N> {
374367
fn len(&self) -> usize {
375-
// Will never underflow due to the invariant `alive.start <=
376-
// alive.end`.
377-
self.alive.end - self.alive.start
368+
self.alive.len()
378369
}
379370
fn is_empty(&self) -> bool {
380371
self.alive.is_empty()
@@ -396,14 +387,15 @@ impl<T: Clone, const N: usize> Clone for IntoIter<T, N> {
396387
fn clone(&self) -> Self {
397388
// Note, we don't really need to match the exact same alive range, so
398389
// we can just clone into offset 0 regardless of where `self` is.
399-
let mut new = Self { data: MaybeUninit::uninit_array(), alive: 0..0 };
390+
let mut new = Self { data: MaybeUninit::uninit_array(), alive: IndexRange::zero_to(0) };
400391

401392
// Clone all alive elements.
402393
for (src, dst) in iter::zip(self.as_slice(), &mut new.data) {
403394
// Write a clone into the new array, then update its alive range.
404395
// If cloning panics, we'll correctly drop the previous items.
405396
dst.write(src.clone());
406-
new.alive.end += 1;
397+
// This addition cannot overflow as we're iterating a slice
398+
new.alive = IndexRange::zero_to(new.alive.end() + 1);
407399
}
408400

409401
new

library/core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
#![feature(const_fmt_arguments_new)]
115115
#![feature(const_heap)]
116116
#![feature(const_convert)]
117+
#![feature(const_index_range_slice_index)]
117118
#![feature(const_inherent_unchecked_arith)]
118119
#![feature(const_int_unchecked_arith)]
119120
#![feature(const_intrinsic_forget)]

library/core/src/ops/index_range.rs

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
use crate::intrinsics::{assert_unsafe_precondition, unchecked_add, unchecked_sub};
2+
use crate::iter::{FusedIterator, TrustedLen};
3+
4+
/// Like a `Range<usize>`, but with a safety invariant that `start <= end`.
5+
///
6+
/// This means that `end - start` cannot overflow, allowing some μoptimizations.
7+
///
8+
/// (Normal `Range` code needs to handle degenerate ranges like `10..0`,
9+
/// which takes extra checks compared to only handling the canonical form.)
10+
#[derive(Clone, Debug, PartialEq, Eq)]
11+
pub(crate) struct IndexRange {
12+
start: usize,
13+
end: usize,
14+
}
15+
16+
impl IndexRange {
17+
/// # Safety
18+
/// - `start <= end`
19+
#[inline]
20+
pub const unsafe fn new_unchecked(start: usize, end: usize) -> Self {
21+
// SAFETY: comparisons on usize are pure
22+
unsafe { assert_unsafe_precondition!((start: usize, end: usize) => start <= end) };
23+
IndexRange { start, end }
24+
}
25+
26+
#[inline]
27+
pub const fn zero_to(end: usize) -> Self {
28+
IndexRange { start: 0, end }
29+
}
30+
31+
#[inline]
32+
pub const fn start(&self) -> usize {
33+
self.start
34+
}
35+
36+
#[inline]
37+
pub const fn end(&self) -> usize {
38+
self.end
39+
}
40+
41+
#[inline]
42+
pub const fn len(&self) -> usize {
43+
// SAFETY: By invariant, this cannot wrap
44+
unsafe { unchecked_sub(self.end, self.start) }
45+
}
46+
47+
/// # Safety
48+
/// - Can only be called when `start < end`, aka when `len > 0`.
49+
#[inline]
50+
unsafe fn next_unchecked(&mut self) -> usize {
51+
debug_assert!(self.start < self.end);
52+
53+
let value = self.start;
54+
// SAFETY: The range isn't empty, so this cannot overflow
55+
self.start = unsafe { unchecked_add(value, 1) };
56+
value
57+
}
58+
59+
/// # Safety
60+
/// - Can only be called when `start < end`, aka when `len > 0`.
61+
#[inline]
62+
unsafe fn next_back_unchecked(&mut self) -> usize {
63+
debug_assert!(self.start < self.end);
64+
65+
// SAFETY: The range isn't empty, so this cannot overflow
66+
let value = unsafe { unchecked_sub(self.end, 1) };
67+
self.end = value;
68+
value
69+
}
70+
71+
/// Removes the first `n` items from this range, returning them as an `IndexRange`.
72+
/// If there are fewer than `n`, then the whole range is returned and
73+
/// `self` is left empty.
74+
///
75+
/// This is designed to help implement `Iterator::advance_by`.
76+
#[inline]
77+
pub fn take_prefix(&mut self, n: usize) -> Self {
78+
let mid = if n <= self.len() {
79+
// SAFETY: We just checked that this will be between start and end,
80+
// and thus the addition cannot overflow.
81+
unsafe { unchecked_add(self.start, n) }
82+
} else {
83+
self.end
84+
};
85+
let prefix = Self { start: self.start, end: mid };
86+
self.start = mid;
87+
prefix
88+
}
89+
90+
/// Removes the last `n` items from this range, returning them as an `IndexRange`.
91+
/// If there are fewer than `n`, then the whole range is returned and
92+
/// `self` is left empty.
93+
///
94+
/// This is designed to help implement `Iterator::advance_back_by`.
95+
#[inline]
96+
pub fn take_suffix(&mut self, n: usize) -> Self {
97+
let mid = if n <= self.len() {
98+
// SAFETY: We just checked that this will be between start and end,
99+
// and thus the addition cannot overflow.
100+
unsafe { unchecked_sub(self.end, n) }
101+
} else {
102+
self.start
103+
};
104+
let suffix = Self { start: mid, end: self.end };
105+
self.end = mid;
106+
suffix
107+
}
108+
}
109+
110+
impl Iterator for IndexRange {
111+
type Item = usize;
112+
113+
#[inline]
114+
fn next(&mut self) -> Option<usize> {
115+
if self.len() > 0 {
116+
// SAFETY: We just checked that the range is non-empty
117+
unsafe { Some(self.next_unchecked()) }
118+
} else {
119+
None
120+
}
121+
}
122+
123+
#[inline]
124+
fn size_hint(&self) -> (usize, Option<usize>) {
125+
let len = self.len();
126+
(len, Some(len))
127+
}
128+
129+
#[inline]
130+
fn advance_by(&mut self, n: usize) -> Result<(), usize> {
131+
let original_len = self.len();
132+
self.take_prefix(n);
133+
if n > original_len { Err(original_len) } else { Ok(()) }
134+
}
135+
}
136+
137+
impl DoubleEndedIterator for IndexRange {
138+
#[inline]
139+
fn next_back(&mut self) -> Option<usize> {
140+
if self.len() > 0 {
141+
// SAFETY: We just checked that the range is non-empty
142+
unsafe { Some(self.next_back_unchecked()) }
143+
} else {
144+
None
145+
}
146+
}
147+
148+
#[inline]
149+
fn advance_back_by(&mut self, n: usize) -> Result<(), usize> {
150+
let original_len = self.len();
151+
self.take_suffix(n);
152+
if n > original_len { Err(original_len) } else { Ok(()) }
153+
}
154+
}
155+
156+
impl ExactSizeIterator for IndexRange {
157+
#[inline]
158+
fn len(&self) -> usize {
159+
self.len()
160+
}
161+
}
162+
163+
// SAFETY: Because we only deal in `usize`, our `len` is always perfect.
164+
unsafe impl TrustedLen for IndexRange {}
165+
166+
impl FusedIterator for IndexRange {}

library/core/src/ops/mod.rs

+3
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ mod drop;
146146
mod function;
147147
mod generator;
148148
mod index;
149+
mod index_range;
149150
mod range;
150151
mod try_trait;
151152
mod unsize;
@@ -178,6 +179,8 @@ pub use self::index::{Index, IndexMut};
178179
#[stable(feature = "rust1", since = "1.0.0")]
179180
pub use self::range::{Range, RangeFrom, RangeFull, RangeTo};
180181

182+
pub(crate) use self::index_range::IndexRange;
183+
181184
#[stable(feature = "inclusive_range", since = "1.26.0")]
182185
pub use self::range::{Bound, RangeBounds, RangeInclusive, RangeToInclusive};
183186

0 commit comments

Comments
 (0)