Skip to content

Commit 9075d4c

Browse files
committed
Make slice iterators carry only a single provenance
Today they carry two, which makes certain optimizations illegal at the LLVM-IR level. In particular, it makes it matter whether an operation is done from the start pointer or the end pointer, since as far as LLVM knows those might have different provenance. For example, this code ```rust pub unsafe fn first_via_nth_back(mut it: std::slice::Iter<'_, i8>) -> &i8 { // CHECK: ret ptr %0 let len = it.len(); it.nth_back(len - 1).unwrap_unchecked() } ``` is <https://rust.godbolt.org/z/8e61vqzhP> ```llvm %2 = ptrtoint ptr %1 to i64 %3 = ptrtoint ptr %0 to i64 %.neg = add i64 %3, 1 %_6.neg = sub i64 %.neg, %2 %_15.i6.i = getelementptr inbounds i8, ptr %1, i64 %_6.neg %_15.i.i = getelementptr inbounds i8, ptr %_15.i6.i, i64 -1 ret ptr %_15.i.i ``` whereas after this PR it's just ```llvm ret ptr %0 ``` (some `assume`s removed in both cases)
1 parent e50ab29 commit 9075d4c

13 files changed

+274
-222
lines changed

library/core/src/slice/iter.rs

+30-14
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ mod macros;
66
use crate::cmp;
77
use crate::fmt;
88
use crate::hint::assert_unchecked;
9+
use crate::intrinsics;
910
use crate::iter::{
1011
FusedIterator, TrustedLen, TrustedRandomAccess, TrustedRandomAccessNoCoerce, UncheckedIterator,
1112
};
1213
use crate::marker::PhantomData;
1314
use crate::mem::{self, SizedTypeProperties};
1415
use crate::num::NonZero;
15-
use crate::ptr::{self, without_provenance, without_provenance_mut, NonNull};
16+
use crate::ptr::{self, NonNull};
1617

1718
use super::{from_raw_parts, from_raw_parts_mut};
1819

@@ -65,10 +66,14 @@ pub struct Iter<'a, T: 'a> {
6566
///
6667
/// This address will be used for all ZST elements, never changed.
6768
ptr: NonNull<T>,
68-
/// For non-ZSTs, the non-null pointer to the past-the-end element.
69+
/// For non-ZSTs, the address of the past-the-end element. This is
70+
/// intentionally *not* a pointer, so that it doesn't carry provenance.
71+
/// If you're turning this into a pointer, you need to use the provenance from
72+
/// `ptr` instead. (If this carried provenance, the compiler wouldn't know
73+
/// that reads from the start and the end are actually the same provenance.)
6974
///
70-
/// For ZSTs, this is `ptr::dangling(len)`.
71-
end_or_len: *const T,
75+
/// For ZSTs, this is the length.
76+
end_addr_or_len: usize,
7277
_marker: PhantomData<&'a T>,
7378
}
7479

@@ -91,10 +96,9 @@ impl<'a, T> Iter<'a, T> {
9196
let ptr: NonNull<T> = NonNull::from(slice).cast();
9297
// SAFETY: Similar to `IterMut::new`.
9398
unsafe {
94-
let end_or_len =
95-
if T::IS_ZST { without_provenance(len) } else { ptr.as_ptr().add(len) };
99+
let end_addr_or_len = if T::IS_ZST { len } else { addr_usize(ptr.add(len)) };
96100

97-
Self { ptr, end_or_len, _marker: PhantomData }
101+
Self { ptr, end_addr_or_len, _marker: PhantomData }
98102
}
99103
}
100104

@@ -144,7 +148,7 @@ iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, as_ref, {
144148
impl<T> Clone for Iter<'_, T> {
145149
#[inline]
146150
fn clone(&self) -> Self {
147-
Iter { ptr: self.ptr, end_or_len: self.end_or_len, _marker: self._marker }
151+
Iter { ptr: self.ptr, end_addr_or_len: self.end_addr_or_len, _marker: self._marker }
148152
}
149153
}
150154

@@ -188,10 +192,14 @@ pub struct IterMut<'a, T: 'a> {
188192
///
189193
/// This address will be used for all ZST elements, never changed.
190194
ptr: NonNull<T>,
191-
/// For non-ZSTs, the non-null pointer to the past-the-end element.
195+
/// For non-ZSTs, the address of the past-the-end element. This is
196+
/// intentionally *not* a pointer, so that it doesn't carry provenance.
197+
/// If you're turning this into a pointer, you need to use the provenance from
198+
/// `ptr` instead. (If this carried provenance, the compiler wouldn't know
199+
/// that reads from the start and the end are actually the same provenance.)
192200
///
193-
/// For ZSTs, this is `ptr::without_provenance_mut(len)`.
194-
end_or_len: *mut T,
201+
/// For ZSTs, this is the length.
202+
end_addr_or_len: usize,
195203
_marker: PhantomData<&'a mut T>,
196204
}
197205

@@ -229,10 +237,9 @@ impl<'a, T> IterMut<'a, T> {
229237
// See the `next_unchecked!` and `is_empty!` macros as well as the
230238
// `post_inc_start` method for more information.
231239
unsafe {
232-
let end_or_len =
233-
if T::IS_ZST { without_provenance_mut(len) } else { ptr.as_ptr().add(len) };
240+
let end_addr_or_len = if T::IS_ZST { len } else { addr_usize(ptr.add(len)) };
234241

235-
Self { ptr, end_or_len, _marker: PhantomData }
242+
Self { ptr, end_addr_or_len, _marker: PhantomData }
236243
}
237244
}
238245

@@ -3423,3 +3430,12 @@ impl<'a, T: 'a + fmt::Debug, P> fmt::Debug for ChunkByMut<'a, T, P> {
34233430
f.debug_struct("ChunkByMut").field("slice", &self.slice).finish()
34243431
}
34253432
}
3433+
3434+
/// Same as `p.addr().get()`, but faster to compile by avoiding a bunch of
3435+
/// intermediate steps and unneeded UB checks, which also inlines better.
3436+
#[inline]
3437+
fn addr_usize<T>(p: NonNull<T>) -> usize {
3438+
// SAFETY: `NonNull` for a sized type has the same layout as `usize`,
3439+
// and we intentionally don't want to expose here.
3440+
unsafe { intrinsics::transmute(p) }
3441+
}

library/core/src/slice/iter/macros.rs

+41-16
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,60 @@
11
//! Macros used by iterators of slice.
22
3-
/// Convenience & performance macro for consuming the `end_or_len` field, by
3+
/// Convenience macro for updating the `end_addr_or_len` field for non-ZSTs.
4+
macro_rules! set_end {
5+
($this:ident . end = $new_end:expr) => {{
6+
$this.end_addr_or_len = addr_usize($new_end);
7+
}};
8+
}
9+
10+
/// Convenience & performance macro for consuming the `end_addr_or_len` field, by
411
/// giving a `(&mut) usize` or `(&mut) NonNull<T>` depending whether `T` is
512
/// or is not a ZST respectively.
613
///
7-
/// Internally, this reads the `end` through a pointer-to-`NonNull` so that
8-
/// it'll get the appropriate non-null metadata in the backend without needing
9-
/// to call `assume` manually.
14+
/// When giving a `NonNull<T>` for the end, it creates it by offsetting from the
15+
/// `ptr` so that the backend knows that both pointers have the same provenance.
1016
macro_rules! if_zst {
1117
(mut $this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
1218
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block
1319

1420
if T::IS_ZST {
15-
// SAFETY: for ZSTs, the pointer is storing a provenance-free length,
16-
// so consuming and updating it as a `usize` is fine.
17-
let $len = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<usize>() };
21+
let $len = &mut $this.end_addr_or_len;
1822
$zst_body
1923
} else {
20-
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
21-
let $end = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<NonNull<T>>() };
24+
// SAFETY: By type invariant `end >= ptr`, and thus the subtraction
25+
// cannot overflow, and the iter represents a single allocated
26+
// object so the `add` will also be in-range.
27+
let $end = unsafe {
28+
let ptr_addr = addr_usize($this.ptr);
29+
// Need to load as `NonZero` to get `!range` metadata
30+
let end_addr: NonZero<usize> = *ptr::addr_of!($this.end_addr_or_len).cast();
31+
// Not using `with_addr` because we have ordering information that
32+
// we can take advantage of here that `with_addr` cannot.
33+
let byte_diff = intrinsics::unchecked_sub(end_addr.get(), ptr_addr);
34+
$this.ptr.byte_add(byte_diff)
35+
};
2236
$other_body
2337
}
2438
}};
2539
($this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
2640
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block
2741

2842
if T::IS_ZST {
29-
let $len = $this.end_or_len.addr();
43+
let $len = $this.end_addr_or_len;
3044
$zst_body
3145
} else {
32-
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
33-
let $end = unsafe { *ptr::addr_of!($this.end_or_len).cast::<NonNull<T>>() };
46+
// SAFETY: By type invariant `end >= ptr`, and thus the subtraction
47+
// cannot overflow, and the iter represents a single allocated
48+
// object so the `add` will also be in-range.
49+
let $end = unsafe {
50+
let ptr_addr = addr_usize($this.ptr);
51+
// Need to load as `NonZero` to get `!range` metadata
52+
let end_addr: NonZero<usize> = *ptr::addr_of!($this.end_addr_or_len).cast();
53+
// Not using `with_addr` because we have ordering information that
54+
// we can take advantage of here that `with_addr` cannot.
55+
let byte_diff = intrinsics::unchecked_sub(end_addr.get(), ptr_addr);
56+
$this.ptr.byte_add(byte_diff)
57+
};
3458
$other_body
3559
}
3660
}};
@@ -128,8 +152,9 @@ macro_rules! iterator {
128152
// which is guaranteed to not overflow an `isize`. Also, the resulting pointer
129153
// is in bounds of `slice`, which fulfills the other requirements for `offset`.
130154
end => unsafe {
131-
*end = end.sub(offset);
132-
*end
155+
let new_end = end.sub(offset);
156+
set_end!(self.end = new_end);
157+
new_end
133158
},
134159
)
135160
}
@@ -184,7 +209,7 @@ macro_rules! iterator {
184209
// This iterator is now empty.
185210
if_zst!(mut self,
186211
len => *len = 0,
187-
end => self.ptr = *end,
212+
end => self.ptr = end,
188213
);
189214
return None;
190215
}
@@ -409,7 +434,7 @@ macro_rules! iterator {
409434
// This iterator is now empty.
410435
if_zst!(mut self,
411436
len => *len = 0,
412-
end => *end = self.ptr,
437+
_end => set_end!(self.end = self.ptr),
413438
);
414439
return None;
415440
}

tests/codegen/issues/issue-37945.rs

+16-15
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,33 @@
33

44
// Check that LLVM understands that `Iter` pointer is not null. Issue #37945.
55

6+
// There used to be a comparison against `null`, so we check that it's not there
7+
// and that the appropriate parameter metadata is.
8+
69
#![crate_type = "lib"]
710

811
use std::slice::Iter;
912

1013
#[no_mangle]
1114
pub fn is_empty_1(xs: Iter<f32>) -> bool {
12-
// CHECK-LABEL: @is_empty_1(
13-
// CHECK-NEXT: start:
14-
// CHECK-NEXT: [[A:%.*]] = icmp ne ptr {{%xs.0|%xs.1}}, null
15-
// CHECK-NEXT: tail call void @llvm.assume(i1 [[A]])
16-
// The order between %xs.0 and %xs.1 on the next line doesn't matter
17-
// and different LLVM versions produce different order.
18-
// CHECK-NEXT: [[B:%.*]] = icmp eq ptr {{%xs.0, %xs.1|%xs.1, %xs.0}}
19-
// CHECK-NEXT: ret i1 [[B:%.*]]
15+
// CHECK-LABEL: @is_empty_1
16+
// CHECK-SAME: (ptr noundef nonnull %xs.0,
17+
18+
// CHECK-NOT: null
19+
// CHECK-NOT: i32 0
20+
// CHECK-NOT: i64 0
21+
2022
{xs}.next().is_none()
2123
}
2224

2325
#[no_mangle]
2426
pub fn is_empty_2(xs: Iter<f32>) -> bool {
2527
// CHECK-LABEL: @is_empty_2
26-
// CHECK-NEXT: start:
27-
// CHECK-NEXT: [[C:%.*]] = icmp ne ptr {{%xs.0|%xs.1}}, null
28-
// CHECK-NEXT: tail call void @llvm.assume(i1 [[C]])
29-
// The order between %xs.0 and %xs.1 on the next line doesn't matter
30-
// and different LLVM versions produce different order.
31-
// CHECK-NEXT: [[D:%.*]] = icmp eq ptr {{%xs.0, %xs.1|%xs.1, %xs.0}}
32-
// CHECK-NEXT: ret i1 [[D:%.*]]
28+
// CHECK-SAME: (ptr noundef nonnull %xs.0,
29+
30+
// CHECK-NOT: null
31+
// CHECK-NOT: i32 0
32+
// CHECK-NOT: i64 0
33+
3334
xs.map(|&x| x).next().is_none()
3435
}

tests/codegen/slice-iter-len-eq-zero.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ type Demo = [u8; 3];
77
#[no_mangle]
88
pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool {
99
// CHECK-NOT: sub
10-
// CHECK: %[[RET:.+]] = icmp eq ptr {{%1|%0}}, {{%1|%0}}
10+
// CHECK: %[[ADDR:.+]] = ptrtoint ptr %0 to [[USIZE:i[0-9]+]]
11+
// CHECK: %[[RET:.+]] = icmp eq [[USIZE]] %[[ADDR]], %1
1112
// CHECK: ret i1 %[[RET]]
1213
y.len() == 0
1314
}

tests/codegen/slice-iter-nonnull.rs

+39-29
Original file line numberDiff line numberDiff line change
@@ -10,36 +10,41 @@
1010
// needed as the code changed to read it as a `NonNull`, and thus gets the
1111
// appropriate `!nonnull` annotations naturally.
1212

13+
// Well, now that the end is stored without provenance, the end pointer gets a
14+
// `!range` annotation instead of a `!nonnull` one.
15+
1316
// CHECK-LABEL: @slice_iter_next(
1417
#[no_mangle]
1518
pub fn slice_iter_next<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'a u32> {
16-
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
17-
// CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
18-
// CHECK-SAME: !nonnull
19-
// CHECK-SAME: !noundef
2019
// CHECK: %[[START:.+]] = load ptr, ptr %it,
2120
// CHECK-SAME: !nonnull
2221
// CHECK-SAME: !noundef
23-
// CHECK: icmp eq ptr %[[START]], %[[END]]
22+
// CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE:i32|i64]]
23+
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
24+
// CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
25+
// CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
26+
// CHECK-SAME: !noundef
27+
// CHECK: icmp eq [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]
2428

25-
// CHECK: store ptr{{.+}}, ptr %it,
29+
// CHECK: store ptr {{.+}}, ptr %it,
2630

2731
it.next()
2832
}
2933

3034
// CHECK-LABEL: @slice_iter_next_back(
3135
#[no_mangle]
3236
pub fn slice_iter_next_back<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'a u32> {
33-
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
34-
// CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
35-
// CHECK-SAME: !nonnull
36-
// CHECK-SAME: !noundef
3737
// CHECK: %[[START:.+]] = load ptr, ptr %it,
3838
// CHECK-SAME: !nonnull
3939
// CHECK-SAME: !noundef
40-
// CHECK: icmp eq ptr %[[START]], %[[END]]
40+
// CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
41+
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
42+
// CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
43+
// CHECK-SAME: !range ![[NONZERO_META]]
44+
// CHECK-SAME: !noundef
45+
// CHECK: icmp eq [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]
4146

42-
// CHECK: store ptr{{.+}}, ptr %[[ENDP]],
47+
// CHECK: store [[USIZE]] {{.+}}, ptr %[[ENDP]],
4348

4449
it.next_back()
4550
}
@@ -54,11 +59,12 @@ pub fn slice_iter_next_back<'a>(it: &mut std::slice::Iter<'a, u32>) -> Option<&'
5459
#[no_mangle]
5560
pub fn slice_iter_new(slice: &[u32]) -> std::slice::Iter<'_, u32> {
5661
// CHECK-NOT: slice
57-
// CHECK: %[[END:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
62+
// CHECK: %[[END_PTR:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
63+
// CHECK: %[[END_ADDR:.+]] = ptrtoint ptr %[[END_PTR]] to [[USIZE]]
5864
// CHECK-NOT: slice
5965
// CHECK: insertvalue {{.+}} ptr %slice.0, 0
6066
// CHECK-NOT: slice
61-
// CHECK: insertvalue {{.+}} ptr %[[END]], 1
67+
// CHECK: insertvalue {{.+}} [[USIZE]] %[[END_ADDR]], 1
6268
// CHECK-NOT: slice
6369
// CHECK: }
6470
slice.iter()
@@ -69,11 +75,12 @@ pub fn slice_iter_new(slice: &[u32]) -> std::slice::Iter<'_, u32> {
6975
#[no_mangle]
7076
pub fn slice_iter_mut_new(slice: &mut [u32]) -> std::slice::IterMut<'_, u32> {
7177
// CHECK-NOT: slice
72-
// CHECK: %[[END:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
78+
// CHECK: %[[END_PTR:.+]] = getelementptr inbounds i32{{.+}} %slice.0{{.+}} %slice.1
79+
// CHECK: %[[END_ADDR:.+]] = ptrtoint ptr %[[END_PTR]] to [[USIZE]]
7380
// CHECK-NOT: slice
7481
// CHECK: insertvalue {{.+}} ptr %slice.0, 0
7582
// CHECK-NOT: slice
76-
// CHECK: insertvalue {{.+}} ptr %[[END]], 1
83+
// CHECK: insertvalue {{.+}} [[USIZE]] %[[END_ADDR]], 1
7784
// CHECK-NOT: slice
7885
// CHECK: }
7986
slice.iter_mut()
@@ -82,33 +89,36 @@ pub fn slice_iter_mut_new(slice: &mut [u32]) -> std::slice::IterMut<'_, u32> {
8289
// CHECK-LABEL: @slice_iter_is_empty
8390
#[no_mangle]
8491
pub fn slice_iter_is_empty(it: &std::slice::Iter<'_, u32>) -> bool {
85-
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
86-
// CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
87-
// CHECK-SAME: !nonnull
88-
// CHECK-SAME: !noundef
8992
// CHECK: %[[START:.+]] = load ptr, ptr %it,
9093
// CHECK-SAME: !nonnull
9194
// CHECK-SAME: !noundef
95+
// CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
96+
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
97+
// CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
98+
// CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
99+
// CHECK-SAME: !noundef
92100

93-
// CHECK: %[[RET:.+]] = icmp eq ptr %[[START]], %[[END]]
101+
// CHECK: %[[RET:.+]] = icmp eq i64 %[[END_ADDR]], %[[START_ADDR]]
94102
// CHECK: ret i1 %[[RET]]
95103
it.is_empty()
96104
}
97105

98106
// CHECK-LABEL: @slice_iter_len
99107
#[no_mangle]
100108
pub fn slice_iter_len(it: &std::slice::Iter<'_, u32>) -> usize {
101-
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
102-
// CHECK: %[[END:.+]] = load ptr, ptr %[[ENDP]]
103-
// CHECK-SAME: !nonnull
104-
// CHECK-SAME: !noundef
105109
// CHECK: %[[START:.+]] = load ptr, ptr %it,
106110
// CHECK-SAME: !nonnull
107111
// CHECK-SAME: !noundef
112+
// CHECK: %[[START_ADDR:.+]] = ptrtoint ptr %[[START]] to [[USIZE]]
113+
// CHECK: %[[ENDP:.+]] = getelementptr inbounds i8, ptr %it, {{i32 4|i64 8}}
114+
// CHECK: %[[END_ADDR:.+]] = load [[USIZE]], ptr %[[ENDP]]
115+
// CHECK-SAME: !range ![[NONZERO_META:[0-9]+]]
116+
// CHECK-SAME: !noundef
108117

109-
// CHECK: ptrtoint
110-
// CHECK: ptrtoint
111-
// CHECK: sub nuw
112-
// CHECK: lshr exact
118+
// CHECK: %[[BYTE_DIFF:.+]] = sub nuw [[USIZE]] %[[END_ADDR]], %[[START_ADDR]]
119+
// CHECK: %[[ELEM_DIFF:.+]] = lshr exact [[USIZE]] %[[BYTE_DIFF]], 2
120+
// CHECK: ret [[USIZE]] %[[ELEM_DIFF]]
113121
it.len()
114122
}
123+
124+
// CHECK: ![[NONZERO_META]] = !{[[USIZE]] 1, [[USIZE]] 0}

0 commit comments

Comments
 (0)