Skip to content

Commit 9d9d591

Browse files
authoredApr 19, 2022
Rollup merge of rust-lang#96165 - RalfJung:miri-provenance-cleanup, r=oli-obk
Miri provenance cleanup Reviewing rust-lang#95826 by ``@carbotaniuman`` made me realize that we could clean things up a little here. ``@carbotaniuman`` please let me know if you're okay with landing this (it will create a lot of conflicts with your PR), or if you'd prefer incorporating the ideas from this PR into yours. I think we want to end up in a situation where the function you called `ptr_reify_alloc` returns just two things, a concrete tag and an offset. Getting an `AllocId` from a concrete tag should be infallible like now. However a concrete tag and `Tag` don't have to be the same type. r? ``@oli-obk``
2 parents f7d8f5b + 55f0977 commit 9d9d591

File tree

5 files changed

+76
-46
lines changed

5 files changed

+76
-46
lines changed
 

‎compiler/rustc_const_eval/src/const_eval/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub(crate) fn const_caller_location(
3838
if intern_const_alloc_recursive(&mut ecx, InternKind::Constant, &loc_place).is_err() {
3939
bug!("intern_const_alloc_recursive should not error in this case")
4040
}
41-
ConstValue::Scalar(Scalar::from_pointer(loc_place.ptr.into_pointer_or_addr().unwrap(), &tcx))
41+
ConstValue::Scalar(Scalar::from_maybe_pointer(loc_place.ptr, &tcx))
4242
}
4343

4444
/// Convert an evaluated constant to a type level constant

‎compiler/rustc_const_eval/src/interpret/machine.rs

+21-7
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ pub trait Machine<'mir, 'tcx>: Sized {
8888
/// Pointers are "tagged" with provenance information; typically the `AllocId` they belong to.
8989
type PointerTag: Provenance + Eq + Hash + 'static;
9090

91+
/// When getting the AllocId of a pointer, some extra data is also obtained from the tag
92+
/// that is passed to memory access hooks so they can do things with it.
93+
type TagExtra: Copy + 'static;
94+
9195
/// Machines can define extra (non-instance) things that represent values of function pointers.
9296
/// For example, Miri uses this to return a function pointer from `dlsym`
9397
/// that can later be called to execute the right thing.
@@ -122,6 +126,8 @@ pub trait Machine<'mir, 'tcx>: Sized {
122126

123127
/// Whether, when checking alignment, we should `force_int` and thus support
124128
/// custom alignment logic based on whatever the integer address happens to be.
129+
///
130+
/// Requires PointerTag::OFFSET_IS_ADDR to be true.
125131
fn force_int_for_alignment_check(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool;
126132

127133
/// Whether to enforce the validity invariant
@@ -285,11 +291,14 @@ pub trait Machine<'mir, 'tcx>: Sized {
285291
addr: u64,
286292
) -> Pointer<Option<Self::PointerTag>>;
287293

288-
/// Convert a pointer with provenance into an allocation-offset pair.
294+
/// Convert a pointer with provenance into an allocation-offset pair
295+
/// and extra provenance info.
296+
///
297+
/// The returned `AllocId` must be the same as `ptr.provenance.get_alloc_id()`.
289298
fn ptr_get_alloc(
290299
ecx: &InterpCx<'mir, 'tcx, Self>,
291300
ptr: Pointer<Self::PointerTag>,
292-
) -> (AllocId, Size);
301+
) -> (AllocId, Size, Self::TagExtra);
293302

294303
/// Called to initialize the "extra" state of an allocation and make the pointers
295304
/// it contains (in relocations) tagged. The way we construct allocations is
@@ -321,7 +330,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
321330
_tcx: TyCtxt<'tcx>,
322331
_machine: &Self,
323332
_alloc_extra: &Self::AllocExtra,
324-
_tag: Self::PointerTag,
333+
_tag: (AllocId, Self::TagExtra),
325334
_range: AllocRange,
326335
) -> InterpResult<'tcx> {
327336
Ok(())
@@ -333,7 +342,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
333342
_tcx: TyCtxt<'tcx>,
334343
_machine: &mut Self,
335344
_alloc_extra: &mut Self::AllocExtra,
336-
_tag: Self::PointerTag,
345+
_tag: (AllocId, Self::TagExtra),
337346
_range: AllocRange,
338347
) -> InterpResult<'tcx> {
339348
Ok(())
@@ -345,7 +354,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
345354
_tcx: TyCtxt<'tcx>,
346355
_machine: &mut Self,
347356
_alloc_extra: &mut Self::AllocExtra,
348-
_tag: Self::PointerTag,
357+
_tag: (AllocId, Self::TagExtra),
349358
_range: AllocRange,
350359
) -> InterpResult<'tcx> {
351360
Ok(())
@@ -397,6 +406,8 @@ pub trait Machine<'mir, 'tcx>: Sized {
397406
// (CTFE and ConstProp) use the same instance. Here, we share that code.
398407
pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
399408
type PointerTag = AllocId;
409+
type TagExtra = ();
410+
400411
type ExtraFnVal = !;
401412

402413
type MemoryMap =
@@ -474,9 +485,12 @@ pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
474485
}
475486

476487
#[inline(always)]
477-
fn ptr_get_alloc(_ecx: &InterpCx<$mir, $tcx, Self>, ptr: Pointer<AllocId>) -> (AllocId, Size) {
488+
fn ptr_get_alloc(
489+
_ecx: &InterpCx<$mir, $tcx, Self>,
490+
ptr: Pointer<AllocId>,
491+
) -> (AllocId, Size, Self::TagExtra) {
478492
// We know `offset` is relative to the allocation, so we can use `into_parts`.
479493
let (alloc_id, offset) = ptr.into_parts();
480-
(alloc_id, offset)
494+
(alloc_id, offset, ())
481495
}
482496
}

‎compiler/rustc_const_eval/src/interpret/memory.rs

+36-37
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
158158
&self,
159159
ptr: Pointer<AllocId>,
160160
) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
161-
// We know `offset` is relative to the allocation, so we can use `into_parts`.
162-
let (alloc_id, offset) = ptr.into_parts();
161+
let alloc_id = ptr.provenance;
163162
// We need to handle `extern static`.
164163
match self.tcx.get_global_alloc(alloc_id) {
165164
Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => {
@@ -171,7 +170,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
171170
_ => {}
172171
}
173172
// And we need to get the tag.
174-
Ok(M::tag_alloc_base_pointer(self, Pointer::new(alloc_id, offset)))
173+
Ok(M::tag_alloc_base_pointer(self, ptr))
175174
}
176175

177176
pub fn create_fn_alloc_ptr(
@@ -238,7 +237,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
238237
new_align: Align,
239238
kind: MemoryKind<M::MemoryKind>,
240239
) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
241-
let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
240+
let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
242241
if offset.bytes() != 0 {
243242
throw_ub_format!(
244243
"reallocating {:?} which does not point to the beginning of an object",
@@ -255,14 +254,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
255254
};
256255
// This will also call the access hooks.
257256
self.mem_copy(
258-
ptr.into(),
257+
ptr,
259258
Align::ONE,
260259
new_ptr.into(),
261260
Align::ONE,
262261
old_size.min(new_size),
263262
/*nonoverlapping*/ true,
264263
)?;
265-
self.deallocate_ptr(ptr.into(), old_size_and_align, kind)?;
264+
self.deallocate_ptr(ptr, old_size_and_align, kind)?;
266265

267266
Ok(new_ptr)
268267
}
@@ -274,7 +273,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
274273
old_size_and_align: Option<(Size, Align)>,
275274
kind: MemoryKind<M::MemoryKind>,
276275
) -> InterpResult<'tcx> {
277-
let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
276+
let (alloc_id, offset, tag) = self.ptr_get_alloc_id(ptr)?;
278277
trace!("deallocating: {}", alloc_id);
279278

280279
if offset.bytes() != 0 {
@@ -330,7 +329,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
330329
*self.tcx,
331330
&mut self.machine,
332331
&mut alloc.extra,
333-
ptr.provenance,
332+
(alloc_id, tag),
334333
alloc_range(Size::ZERO, size),
335334
)?;
336335

@@ -350,17 +349,17 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
350349
ptr: Pointer<Option<M::PointerTag>>,
351350
size: Size,
352351
align: Align,
353-
) -> InterpResult<'tcx, Option<(AllocId, Size, Pointer<M::PointerTag>)>> {
352+
) -> InterpResult<'tcx, Option<(AllocId, Size, M::TagExtra)>> {
354353
let align = M::enforce_alignment(&self).then_some(align);
355354
self.check_and_deref_ptr(
356355
ptr,
357356
size,
358357
align,
359358
CheckInAllocMsg::MemoryAccessTest,
360-
|alloc_id, offset, ptr| {
359+
|alloc_id, offset, tag| {
361360
let (size, align) =
362361
self.get_alloc_size_and_align(alloc_id, AllocCheck::Dereferenceable)?;
363-
Ok((size, align, (alloc_id, offset, ptr)))
362+
Ok((size, align, (alloc_id, offset, tag)))
364363
},
365364
)
366365
}
@@ -401,11 +400,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
401400
size: Size,
402401
align: Option<Align>,
403402
msg: CheckInAllocMsg,
404-
alloc_size: impl FnOnce(
405-
AllocId,
406-
Size,
407-
Pointer<M::PointerTag>,
408-
) -> InterpResult<'tcx, (Size, Align, T)>,
403+
alloc_size: impl FnOnce(AllocId, Size, M::TagExtra) -> InterpResult<'tcx, (Size, Align, T)>,
409404
) -> InterpResult<'tcx, Option<T>> {
410405
fn check_offset_align(offset: u64, align: Align) -> InterpResult<'static> {
411406
if offset % align.bytes() == 0 {
@@ -433,8 +428,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
433428
}
434429
None
435430
}
436-
Ok((alloc_id, offset, ptr)) => {
437-
let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, ptr)?;
431+
Ok((alloc_id, offset, tag)) => {
432+
let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, tag)?;
438433
// Test bounds. This also ensures non-null.
439434
// It is sufficient to check this for the end pointer. Also check for overflow!
440435
if offset.checked_add(size, &self.tcx).map_or(true, |end| end > alloc_size) {
@@ -450,10 +445,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
450445
// we want the error to be about the bounds.
451446
if let Some(align) = align {
452447
if M::force_int_for_alignment_check(self) {
453-
let addr = Scalar::from_pointer(ptr, &self.tcx)
454-
.to_machine_usize(&self.tcx)
455-
.expect("ptr-to-int cast for align check should never fail");
456-
check_offset_align(addr, align)?;
448+
// `force_int_for_alignment_check` can only be true if `OFFSET_IS_ADDR` is true.
449+
check_offset_align(ptr.addr().bytes(), align)?;
457450
} else {
458451
// Check allocation alignment and offset alignment.
459452
if alloc_align.bytes() < align.bytes() {
@@ -569,14 +562,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
569562
size,
570563
align,
571564
CheckInAllocMsg::MemoryAccessTest,
572-
|alloc_id, offset, ptr| {
565+
|alloc_id, offset, tag| {
573566
let alloc = self.get_alloc_raw(alloc_id)?;
574-
Ok((alloc.size(), alloc.align, (alloc_id, offset, ptr, alloc)))
567+
Ok((alloc.size(), alloc.align, (alloc_id, offset, tag, alloc)))
575568
},
576569
)?;
577-
if let Some((alloc_id, offset, ptr, alloc)) = ptr_and_alloc {
570+
if let Some((alloc_id, offset, tag, alloc)) = ptr_and_alloc {
578571
let range = alloc_range(offset, size);
579-
M::memory_read(*self.tcx, &self.machine, &alloc.extra, ptr.provenance, range)?;
572+
M::memory_read(*self.tcx, &self.machine, &alloc.extra, (alloc_id, tag), range)?;
580573
Ok(Some(AllocRef { alloc, range, tcx: *self.tcx, alloc_id }))
581574
} else {
582575
// Even in this branch we have to be sure that we actually access the allocation, in
@@ -631,13 +624,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
631624
align: Align,
632625
) -> InterpResult<'tcx, Option<AllocRefMut<'a, 'tcx, M::PointerTag, M::AllocExtra>>> {
633626
let parts = self.get_ptr_access(ptr, size, align)?;
634-
if let Some((alloc_id, offset, ptr)) = parts {
627+
if let Some((alloc_id, offset, tag)) = parts {
635628
let tcx = *self.tcx;
636629
// FIXME: can we somehow avoid looking up the allocation twice here?
637630
// We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
638631
let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
639632
let range = alloc_range(offset, size);
640-
M::memory_written(tcx, machine, &mut alloc.extra, ptr.provenance, range)?;
633+
M::memory_written(tcx, machine, &mut alloc.extra, (alloc_id, tag), range)?;
641634
Ok(Some(AllocRefMut { alloc, range, tcx, alloc_id }))
642635
} else {
643636
Ok(None)
@@ -732,7 +725,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
732725
ptr: Pointer<Option<M::PointerTag>>,
733726
) -> InterpResult<'tcx, FnVal<'tcx, M::ExtraFnVal>> {
734727
trace!("get_fn({:?})", ptr);
735-
let (alloc_id, offset, _ptr) = self.ptr_get_alloc_id(ptr)?;
728+
let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
736729
if offset.bytes() != 0 {
737730
throw_ub!(InvalidFunctionPointer(Pointer::new(alloc_id, offset)))
738731
}
@@ -1012,16 +1005,16 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
10121005
// and once below to get the underlying `&[mut] Allocation`.
10131006

10141007
// Source alloc preparations and access hooks.
1015-
let Some((src_alloc_id, src_offset, src)) = src_parts else {
1008+
let Some((src_alloc_id, src_offset, src_tag)) = src_parts else {
10161009
// Zero-sized *source*, that means dst is also zero-sized and we have nothing to do.
10171010
return Ok(());
10181011
};
10191012
let src_alloc = self.get_alloc_raw(src_alloc_id)?;
10201013
let src_range = alloc_range(src_offset, size);
1021-
M::memory_read(*tcx, &self.machine, &src_alloc.extra, src.provenance, src_range)?;
1014+
M::memory_read(*tcx, &self.machine, &src_alloc.extra, (src_alloc_id, src_tag), src_range)?;
10221015
// We need the `dest` ptr for the next operation, so we get it now.
10231016
// We already did the source checks and called the hooks so we are good to return early.
1024-
let Some((dest_alloc_id, dest_offset, dest)) = dest_parts else {
1017+
let Some((dest_alloc_id, dest_offset, dest_tag)) = dest_parts else {
10251018
// Zero-sized *destination*.
10261019
return Ok(());
10271020
};
@@ -1043,7 +1036,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
10431036
// Destination alloc preparations and access hooks.
10441037
let (dest_alloc, extra) = self.get_alloc_raw_mut(dest_alloc_id)?;
10451038
let dest_range = alloc_range(dest_offset, size * num_copies);
1046-
M::memory_written(*tcx, extra, &mut dest_alloc.extra, dest.provenance, dest_range)?;
1039+
M::memory_written(
1040+
*tcx,
1041+
extra,
1042+
&mut dest_alloc.extra,
1043+
(dest_alloc_id, dest_tag),
1044+
dest_range,
1045+
)?;
10471046
let dest_bytes = dest_alloc
10481047
.get_bytes_mut_ptr(&tcx, dest_range)
10491048
.map_err(|e| e.to_interp_error(dest_alloc_id))?
@@ -1164,11 +1163,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
11641163
pub fn ptr_try_get_alloc_id(
11651164
&self,
11661165
ptr: Pointer<Option<M::PointerTag>>,
1167-
) -> Result<(AllocId, Size, Pointer<M::PointerTag>), u64> {
1166+
) -> Result<(AllocId, Size, M::TagExtra), u64> {
11681167
match ptr.into_pointer_or_addr() {
11691168
Ok(ptr) => {
1170-
let (alloc_id, offset) = M::ptr_get_alloc(self, ptr);
1171-
Ok((alloc_id, offset, ptr))
1169+
let (alloc_id, offset, extra) = M::ptr_get_alloc(self, ptr);
1170+
Ok((alloc_id, offset, extra))
11721171
}
11731172
Err(addr) => Err(addr.bytes()),
11741173
}
@@ -1179,7 +1178,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
11791178
pub fn ptr_get_alloc_id(
11801179
&self,
11811180
ptr: Pointer<Option<M::PointerTag>>,
1182-
) -> InterpResult<'tcx, (AllocId, Size, Pointer<M::PointerTag>)> {
1181+
) -> InterpResult<'tcx, (AllocId, Size, M::TagExtra)> {
11831182
self.ptr_try_get_alloc_id(ptr).map_err(|offset| {
11841183
err_ub!(DanglingIntPointer(offset, CheckInAllocMsg::InboundsTest)).into()
11851184
})

‎compiler/rustc_const_eval/src/interpret/validity.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -432,7 +432,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
432432
if let Some(ref mut ref_tracking) = self.ref_tracking {
433433
// Proceed recursively even for ZST, no reason to skip them!
434434
// `!` is a ZST and we want to validate it.
435-
if let Ok((alloc_id, _offset, _ptr)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
435+
if let Ok((alloc_id, _offset, _tag)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
436436
// Special handling for pointers to statics (irrespective of their type).
437437
let alloc_kind = self.ecx.tcx.get_global_alloc(alloc_id);
438438
if let Some(GlobalAlloc::Static(did)) = alloc_kind {

‎compiler/rustc_middle/src/mir/interpret/pointer.rs

+17
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ pub struct Pointer<Tag = AllocId> {
163163
}
164164

165165
static_assert_size!(Pointer, 16);
166+
// `Option<Tag>` pointers are also passed around quite a bit
167+
// (but not stored in permanent machine state).
168+
static_assert_size!(Pointer<Option<AllocId>>, 16);
166169

167170
// We want the `Debug` output to be readable as it is used by `derive(Debug)` for
168171
// all the Miri types.
@@ -198,12 +201,26 @@ impl<Tag> From<Pointer<Tag>> for Pointer<Option<Tag>> {
198201
}
199202

200203
impl<Tag> Pointer<Option<Tag>> {
204+
/// Convert this pointer that *might* have a tag into a pointer that *definitely* has a tag, or
205+
/// an absolute address.
206+
///
207+
/// This is rarely what you want; call `ptr_try_get_alloc_id` instead.
201208
pub fn into_pointer_or_addr(self) -> Result<Pointer<Tag>, Size> {
202209
match self.provenance {
203210
Some(tag) => Ok(Pointer::new(tag, self.offset)),
204211
None => Err(self.offset),
205212
}
206213
}
214+
215+
/// Returns the absolute address the pointer points to.
216+
/// Only works if Tag::OFFSET_IS_ADDR is true!
217+
pub fn addr(self) -> Size
218+
where
219+
Tag: Provenance,
220+
{
221+
assert!(Tag::OFFSET_IS_ADDR);
222+
self.offset
223+
}
207224
}
208225

209226
impl<Tag> Pointer<Option<Tag>> {

0 commit comments

Comments
 (0)
Please sign in to comment.