Skip to content

Commit e50ff9b

Browse files
committed
Auto merge of #95241 - Gankra:cleaned-provenance, r=workingjubilee
Strict Provenance MVP This patch series examines the question: how bad would it be if we adopted an extremely strict pointer provenance model that completely banished all int<->ptr casts. The key insight to making this approach even *vaguely* pallatable is the ptr.with_addr(addr) -> ptr function, which takes a pointer and an address and creates a new pointer with that address and the provenance of the input pointer. In this way the "chain of custody" is completely and dynamically restored, making the model suitable even for dynamic checkers like CHERI and Miri. This is not a formal model, but lots of the docs discussing the model have been updated to try to the *concept* of this design in the hopes that it can be iterated on. See #95228
2 parents 05142a7 + e3a3afe commit e50ff9b

39 files changed

+621
-126
lines changed

compiler/rustc_arena/src/lib.rs

+17-9
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#![feature(decl_macro)]
1919
#![feature(rustc_attrs)]
2020
#![cfg_attr(test, feature(test))]
21+
#![feature(strict_provenance)]
2122

2223
use smallvec::SmallVec;
2324

@@ -87,7 +88,7 @@ impl<T> ArenaChunk<T> {
8788
unsafe {
8889
if mem::size_of::<T>() == 0 {
8990
// A pointer as large as possible for zero-sized elements.
90-
!0 as *mut T
91+
ptr::invalid_mut(!0)
9192
} else {
9293
self.start().add(self.storage.len())
9394
}
@@ -199,7 +200,7 @@ impl<T> TypedArena<T> {
199200
unsafe {
200201
if mem::size_of::<T>() == 0 {
201202
self.ptr.set((self.ptr.get() as *mut u8).wrapping_offset(1) as *mut T);
202-
let ptr = mem::align_of::<T>() as *mut T;
203+
let ptr = ptr::NonNull::<T>::dangling().as_ptr();
203204
// Don't drop the object. This `write` is equivalent to `forget`.
204205
ptr::write(ptr, object);
205206
&mut *ptr
@@ -216,7 +217,9 @@ impl<T> TypedArena<T> {
216217

217218
#[inline]
218219
fn can_allocate(&self, additional: usize) -> bool {
219-
let available_bytes = self.end.get() as usize - self.ptr.get() as usize;
220+
// FIXME: this should *likely* use `offset_from`, but more
221+
// investigation is needed (including running tests in miri).
222+
let available_bytes = self.end.get().addr() - self.ptr.get().addr();
220223
let additional_bytes = additional.checked_mul(mem::size_of::<T>()).unwrap();
221224
available_bytes >= additional_bytes
222225
}
@@ -262,7 +265,9 @@ impl<T> TypedArena<T> {
262265
// If a type is `!needs_drop`, we don't need to keep track of how many elements
263266
// the chunk stores - the field will be ignored anyway.
264267
if mem::needs_drop::<T>() {
265-
let used_bytes = self.ptr.get() as usize - last_chunk.start() as usize;
268+
// FIXME: this should *likely* use `offset_from`, but more
269+
// investigation is needed (including running tests in miri).
270+
let used_bytes = self.ptr.get().addr() - last_chunk.start().addr();
266271
last_chunk.entries = used_bytes / mem::size_of::<T>();
267272
}
268273

@@ -288,9 +293,9 @@ impl<T> TypedArena<T> {
288293
// chunks.
289294
fn clear_last_chunk(&self, last_chunk: &mut ArenaChunk<T>) {
290295
// Determine how much was filled.
291-
let start = last_chunk.start() as usize;
296+
let start = last_chunk.start().addr();
292297
// We obtain the value of the pointer to the first uninitialized element.
293-
let end = self.ptr.get() as usize;
298+
let end = self.ptr.get().addr();
294299
// We then calculate the number of elements to be dropped in the last chunk,
295300
// which is the filled area's length.
296301
let diff = if mem::size_of::<T>() == 0 {
@@ -299,6 +304,8 @@ impl<T> TypedArena<T> {
299304
// Recall that `end` was incremented for each allocated value.
300305
end - start
301306
} else {
307+
// FIXME: this should *likely* use `offset_from`, but more
308+
// investigation is needed (including running tests in miri).
302309
(end - start) / mem::size_of::<T>()
303310
};
304311
// Pass that to the `destroy` method.
@@ -395,15 +402,16 @@ impl DroplessArena {
395402
/// request.
396403
#[inline]
397404
fn alloc_raw_without_grow(&self, layout: Layout) -> Option<*mut u8> {
398-
let start = self.start.get() as usize;
399-
let end = self.end.get() as usize;
405+
let start = self.start.get().addr();
406+
let old_end = self.end.get();
407+
let end = old_end.addr();
400408

401409
let align = layout.align();
402410
let bytes = layout.size();
403411

404412
let new_end = end.checked_sub(bytes)? & !(align - 1);
405413
if start <= new_end {
406-
let new_end = new_end as *mut u8;
414+
let new_end = old_end.with_addr(new_end);
407415
self.end.set(new_end);
408416
Some(new_end)
409417
} else {

compiler/rustc_codegen_ssa/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#![feature(once_cell)]
77
#![feature(nll)]
88
#![feature(associated_type_bounds)]
9+
#![feature(strict_provenance)]
910
#![recursion_limit = "256"]
1011
#![allow(rustc::potential_query_instability)]
1112

compiler/rustc_codegen_ssa/src/mono_item.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ impl<'a, 'tcx: 'a> MonoItemExt<'a, 'tcx> for MonoItem<'tcx> {
116116
fn to_raw_string(&self) -> String {
117117
match *self {
118118
MonoItem::Fn(instance) => {
119-
format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr() as usize)
119+
format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr().addr())
120120
}
121121
MonoItem::Static(id) => format!("Static({:?})", id),
122122
MonoItem::GlobalAsm(id) => format!("GlobalAsm({:?})", id),

library/alloc/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
#![feature(slice_ptr_len)]
128128
#![feature(slice_range)]
129129
#![feature(str_internals)]
130+
#![feature(strict_provenance)]
130131
#![feature(trusted_len)]
131132
#![feature(trusted_random_access)]
132133
#![feature(try_trait_v2)]

library/alloc/src/rc.rs

+2-3
Original file line numberDiff line numberDiff line change
@@ -2115,13 +2115,12 @@ impl<T> Weak<T> {
21152115
#[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")]
21162116
#[must_use]
21172117
pub const fn new() -> Weak<T> {
2118-
Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut RcBox<T>) } }
2118+
Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::<RcBox<T>>(usize::MAX)) } }
21192119
}
21202120
}
21212121

21222122
pub(crate) fn is_dangling<T: ?Sized>(ptr: *mut T) -> bool {
2123-
let address = ptr as *mut () as usize;
2124-
address == usize::MAX
2123+
(ptr as *mut ()).addr() == usize::MAX
21252124
}
21262125

21272126
/// Helper type to allow accessing the reference counts without

library/alloc/src/slice.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1043,9 +1043,9 @@ where
10431043

10441044
impl<T> Drop for MergeHole<T> {
10451045
fn drop(&mut self) {
1046-
// `T` is not a zero-sized type, so it's okay to divide by its size.
1047-
let len = (self.end as usize - self.start as usize) / mem::size_of::<T>();
1046+
// `T` is not a zero-sized type, and these are pointers into a slice's elements.
10481047
unsafe {
1048+
let len = self.end.offset_from(self.start) as usize;
10491049
ptr::copy_nonoverlapping(self.start, self.dest, len);
10501050
}
10511051
}

library/alloc/src/sync.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1746,7 +1746,7 @@ impl<T> Weak<T> {
17461746
#[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")]
17471747
#[must_use]
17481748
pub const fn new() -> Weak<T> {
1749-
Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut ArcInner<T>) } }
1749+
Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::<ArcInner<T>>(usize::MAX)) } }
17501750
}
17511751
}
17521752

library/alloc/src/vec/into_iter.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
159159
#[inline]
160160
fn size_hint(&self) -> (usize, Option<usize>) {
161161
let exact = if mem::size_of::<T>() == 0 {
162-
(self.end as usize).wrapping_sub(self.ptr as usize)
162+
self.end.addr().wrapping_sub(self.ptr.addr())
163163
} else {
164164
unsafe { self.end.offset_from(self.ptr) as usize }
165165
};

library/core/src/alloc/layout.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ impl Layout {
194194
#[inline]
195195
pub const fn dangling(&self) -> NonNull<u8> {
196196
// SAFETY: align is guaranteed to be non-zero
197-
unsafe { NonNull::new_unchecked(self.align() as *mut u8) }
197+
unsafe { NonNull::new_unchecked(crate::ptr::invalid_mut::<u8>(self.align())) }
198198
}
199199

200200
/// Creates a layout describing the record that can hold a value

library/core/src/fmt/mod.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,10 @@ impl<'a> ArgumentV1<'a> {
352352
}
353353

354354
fn as_usize(&self) -> Option<usize> {
355+
// We are type punning a bit here: USIZE_MARKER only takes an &usize but
356+
// formatter takes an &Opaque. Rust understandably doesn't think we should compare
357+
// the function pointers if they don't have the same signature, so we cast to
358+
// usizes to tell it that we just want to compare addresses.
355359
if self.formatter as usize == USIZE_MARKER as usize {
356360
// SAFETY: The `formatter` field is only set to USIZE_MARKER if
357361
// the value is a usize, so this is safe
@@ -2246,7 +2250,7 @@ impl<T: ?Sized> Pointer for *const T {
22462250
}
22472251
f.flags |= 1 << (FlagV1::Alternate as u32);
22482252

2249-
let ret = LowerHex::fmt(&(ptr as usize), f);
2253+
let ret = LowerHex::fmt(&(ptr.addr()), f);
22502254

22512255
f.width = old_width;
22522256
f.flags = old_flags;

library/core/src/hash/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,7 @@ mod impls {
793793
#[inline]
794794
fn hash<H: Hasher>(&self, state: &mut H) {
795795
let (address, metadata) = self.to_raw_parts();
796-
state.write_usize(address as usize);
796+
state.write_usize(address.addr());
797797
metadata.hash(state);
798798
}
799799
}
@@ -803,7 +803,7 @@ mod impls {
803803
#[inline]
804804
fn hash<H: Hasher>(&self, state: &mut H) {
805805
let (address, metadata) = self.to_raw_parts();
806-
state.write_usize(address as usize);
806+
state.write_usize(address.addr());
807807
metadata.hash(state);
808808
}
809809
}

library/core/src/intrinsics.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -1972,15 +1972,15 @@ extern "rust-intrinsic" {
19721972
/// Checks whether `ptr` is properly aligned with respect to
19731973
/// `align_of::<T>()`.
19741974
pub(crate) fn is_aligned_and_not_null<T>(ptr: *const T) -> bool {
1975-
!ptr.is_null() && ptr as usize % mem::align_of::<T>() == 0
1975+
!ptr.is_null() && ptr.addr() % mem::align_of::<T>() == 0
19761976
}
19771977

19781978
/// Checks whether the regions of memory starting at `src` and `dst` of size
19791979
/// `count * size_of::<T>()` do *not* overlap.
19801980
#[cfg(debug_assertions)]
19811981
pub(crate) fn is_nonoverlapping<T>(src: *const T, dst: *const T, count: usize) -> bool {
1982-
let src_usize = src as usize;
1983-
let dst_usize = dst as usize;
1982+
let src_usize = src.addr();
1983+
let dst_usize = dst.addr();
19841984
let size = mem::size_of::<T>().checked_mul(count).unwrap();
19851985
let diff = if src_usize > dst_usize { src_usize - dst_usize } else { dst_usize - src_usize };
19861986
// If the absolute distance between the ptrs is at least as big as the size of the buffer,

library/core/src/ptr/const_ptr.rs

+74-1
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,79 @@ impl<T: ?Sized> *const T {
150150
bits as Self
151151
}
152152

153+
/// Gets the "address" portion of the pointer.
154+
///
155+
/// This is equivalent to `self as usize`, which semantically discards
156+
/// *provenance* and *address-space* information. To properly restore that information,
157+
/// use [`with_addr`][pointer::with_addr] or [`map_addr`][pointer::map_addr].
158+
///
159+
/// On most platforms this will produce a value with the same bytes as the original
160+
/// pointer, because all the bytes are dedicated to describing the address.
161+
/// Platforms which need to store additional information in the pointer may
162+
/// perform a change of representation to produce a value containing only the address
163+
/// portion of the pointer. What that means is up to the platform to define.
164+
///
165+
/// This API and its claimed semantics are part of the Strict Provenance experiment,
166+
/// see the [module documentation][crate::ptr] for details.
167+
#[must_use]
168+
#[inline]
169+
#[unstable(feature = "strict_provenance", issue = "95228")]
170+
pub fn addr(self) -> usize
171+
where
172+
T: Sized,
173+
{
174+
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
175+
self as usize
176+
}
177+
178+
/// Creates a new pointer with the given address.
179+
///
180+
/// This performs the same operation as an `addr as ptr` cast, but copies
181+
/// the *address-space* and *provenance* of `self` to the new pointer.
182+
/// This allows us to dynamically preserve and propagate this important
183+
/// information in a way that is otherwise impossible with a unary cast.
184+
///
185+
/// This is equivalent to using [`wrapping_offset`][pointer::wrapping_offset] to offset
186+
/// `self` to the given address, and therefore has all the same capabilities and restrictions.
187+
///
188+
/// This API and its claimed semantics are part of the Strict Provenance experiment,
189+
/// see the [module documentation][crate::ptr] for details.
190+
#[must_use]
191+
#[inline]
192+
#[unstable(feature = "strict_provenance", issue = "95228")]
193+
pub fn with_addr(self, addr: usize) -> Self
194+
where
195+
T: Sized,
196+
{
197+
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
198+
//
199+
// In the mean-time, this operation is defined to be "as if" it was
200+
// a wrapping_offset, so we can emulate it as such. This should properly
201+
// restore pointer provenance even under today's compiler.
202+
let self_addr = self.addr() as isize;
203+
let dest_addr = addr as isize;
204+
let offset = dest_addr.wrapping_sub(self_addr);
205+
206+
// This is the canonical desugarring of this operation
207+
self.cast::<u8>().wrapping_offset(offset).cast::<T>()
208+
}
209+
210+
/// Creates a new pointer by mapping `self`'s address to a new one.
211+
///
212+
/// This is a convenience for [`with_addr`][pointer::with_addr], see that method for details.
213+
///
214+
/// This API and its claimed semantics are part of the Strict Provenance experiment,
215+
/// see the [module documentation][crate::ptr] for details.
216+
#[must_use]
217+
#[inline]
218+
#[unstable(feature = "strict_provenance", issue = "95228")]
219+
pub fn map_addr(self, f: impl FnOnce(usize) -> usize) -> Self
220+
where
221+
T: Sized,
222+
{
223+
self.with_addr(f(self.addr()))
224+
}
225+
153226
/// Decompose a (possibly wide) pointer into its address and metadata components.
154227
///
155228
/// The pointer can be later reconstructed with [`from_raw_parts`].
@@ -1006,7 +1079,7 @@ impl<T> *const [T] {
10061079
/// use std::ptr;
10071080
///
10081081
/// let slice: *const [i8] = ptr::slice_from_raw_parts(ptr::null(), 3);
1009-
/// assert_eq!(slice.as_ptr(), 0 as *const i8);
1082+
/// assert_eq!(slice.as_ptr(), ptr::null());
10101083
/// ```
10111084
#[inline]
10121085
#[unstable(feature = "slice_ptr_get", issue = "74265")]

0 commit comments

Comments
 (0)