Skip to content

Commit 9d20fd1

Browse files
committed
Auto merge of #97684 - RalfJung:better-provenance-control, r=oli-obk
interpret: better control over whether we read data with provenance The resolution in rust-lang/unsafe-code-guidelines#286 seems to be that when we load data at integer type, we implicitly strip provenance. So let's implement that in Miri at least for scalar loads. This makes use of the fact that `Scalar` layouts distinguish pointer-sized integers and pointers -- so I was expecting some wild bugs where layouts set this incorrectly, but so far that does not seem to happen. This does not entirely implement the solution to rust-lang/unsafe-code-guidelines#286; we still do the wrong thing for integers in larger types: we will `copy_op` them and then do validation, and validation will complain about the provenance. To fix that we need mutating validation; validation needs to strip the provenance rather than complaining about it. This is a larger undertaking (but will also help resolve rust-lang/miri#845 since we can reset padding to `Uninit`). The reason this is useful is that we can now implement `addr` as a `transmute` from a pointer to an integer, and actually get the desired behavior of stripping provenance without exposing it!
2 parents 79b6bad + d208f80 commit 9d20fd1

22 files changed

+494
-404
lines changed

compiler/rustc_const_eval/src/interpret/memory.rs

+19-4
Original file line numberDiff line numberDiff line change
@@ -908,11 +908,15 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRefMut<'a, 'tcx, Tag, Extra> {
908908
}
909909

910910
impl<'tcx, 'a, Tag: Provenance, Extra> AllocRef<'a, 'tcx, Tag, Extra> {
911-
pub fn read_scalar(&self, range: AllocRange) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
911+
pub fn read_scalar(
912+
&self,
913+
range: AllocRange,
914+
read_provenance: bool,
915+
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
912916
let range = self.range.subrange(range);
913917
let res = self
914918
.alloc
915-
.read_scalar(&self.tcx, range)
919+
.read_scalar(&self.tcx, range, read_provenance)
916920
.map_err(|e| e.to_interp_error(self.alloc_id))?;
917921
debug!(
918922
"read_scalar in {} at {:#x}, size {}: {:?}",
@@ -924,8 +928,19 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRef<'a, 'tcx, Tag, Extra> {
924928
Ok(res)
925929
}
926930

927-
pub fn read_ptr_sized(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
928-
self.read_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size))
931+
pub fn read_integer(
932+
&self,
933+
offset: Size,
934+
size: Size,
935+
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
936+
self.read_scalar(alloc_range(offset, size), /*read_provenance*/ false)
937+
}
938+
939+
pub fn read_pointer(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
940+
self.read_scalar(
941+
alloc_range(offset, self.tcx.data_layout().pointer_size),
942+
/*read_provenance*/ true,
943+
)
929944
}
930945

931946
pub fn check_bytes(

compiler/rustc_const_eval/src/interpret/operand.rs

+15-6
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ use rustc_target::abi::{VariantIdx, Variants};
1515

1616
use super::{
1717
alloc_range, from_known_layout, mir_assign_valid_types, AllocId, ConstValue, GlobalId,
18-
InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, Place, PlaceTy, Pointer, Provenance,
19-
Scalar, ScalarMaybeUninit,
18+
InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, Place, PlaceTy, Pointer,
19+
PointerArithmetic, Provenance, Scalar, ScalarMaybeUninit,
2020
};
2121

2222
/// An `Immediate` represents a single immediate self-contained Rust value.
@@ -284,11 +284,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
284284
Abi::Scalar(s) if force => Some(s.primitive()),
285285
_ => None,
286286
};
287-
if let Some(_s) = scalar_layout {
287+
let read_provenance = |s: abi::Primitive, size| {
288+
// Should be just `s.is_ptr()`, but we support a Miri flag that accepts more
289+
// questionable ptr-int transmutes.
290+
let number_may_have_provenance = !M::enforce_number_no_provenance(self);
291+
s.is_ptr() || (number_may_have_provenance && size == self.pointer_size())
292+
};
293+
if let Some(s) = scalar_layout {
288294
//FIXME(#96185): let size = s.size(self);
289295
//FIXME(#96185): assert_eq!(size, mplace.layout.size, "abi::Scalar size does not match layout size");
290296
let size = mplace.layout.size; //FIXME(#96185): remove this line
291-
let scalar = alloc.read_scalar(alloc_range(Size::ZERO, size))?;
297+
let scalar =
298+
alloc.read_scalar(alloc_range(Size::ZERO, size), read_provenance(s, size))?;
292299
return Ok(Some(ImmTy { imm: scalar.into(), layout: mplace.layout }));
293300
}
294301
let scalar_pair_layout = match mplace.layout.abi {
@@ -306,8 +313,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
306313
let (a_size, b_size) = (a.size(self), b.size(self));
307314
let b_offset = a_size.align_to(b.align(self).abi);
308315
assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields
309-
let a_val = alloc.read_scalar(alloc_range(Size::ZERO, a_size))?;
310-
let b_val = alloc.read_scalar(alloc_range(b_offset, b_size))?;
316+
let a_val =
317+
alloc.read_scalar(alloc_range(Size::ZERO, a_size), read_provenance(a, a_size))?;
318+
let b_val =
319+
alloc.read_scalar(alloc_range(b_offset, b_size), read_provenance(b, b_size))?;
311320
return Ok(Some(ImmTy {
312321
imm: Immediate::ScalarPair(a_val, b_val),
313322
layout: mplace.layout,

compiler/rustc_const_eval/src/interpret/traits.rs

+11-8
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
5050
let vtable_slot = self
5151
.get_ptr_alloc(vtable_slot, ptr_size, self.tcx.data_layout.pointer_align.abi)?
5252
.expect("cannot be a ZST");
53-
let fn_ptr = self.scalar_to_ptr(vtable_slot.read_ptr_sized(Size::ZERO)?.check_init()?)?;
53+
let fn_ptr = self.scalar_to_ptr(vtable_slot.read_pointer(Size::ZERO)?.check_init()?)?;
5454
self.get_ptr_fn(fn_ptr)
5555
}
5656

@@ -69,9 +69,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
6969
)?
7070
.expect("cannot be a ZST");
7171
let drop_fn = vtable
72-
.read_ptr_sized(
73-
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_DROPINPLACE).unwrap(),
74-
)?
72+
.read_pointer(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_DROPINPLACE).unwrap())?
7573
.check_init()?;
7674
// We *need* an instance here, no other kind of function value, to be able
7775
// to determine the type.
@@ -104,12 +102,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
104102
)?
105103
.expect("cannot be a ZST");
106104
let size = vtable
107-
.read_ptr_sized(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_SIZE).unwrap())?
105+
.read_integer(
106+
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_SIZE).unwrap(),
107+
pointer_size,
108+
)?
108109
.check_init()?;
109110
let size = size.to_machine_usize(self)?;
110111
let size = Size::from_bytes(size);
111112
let align = vtable
112-
.read_ptr_sized(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_ALIGN).unwrap())?
113+
.read_integer(
114+
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_ALIGN).unwrap(),
115+
pointer_size,
116+
)?
113117
.check_init()?;
114118
let align = align.to_machine_usize(self)?;
115119
let align = Align::from_bytes(align).map_err(|e| err_ub!(InvalidVtableAlignment(e)))?;
@@ -132,8 +136,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
132136
.get_ptr_alloc(vtable_slot, pointer_size, self.tcx.data_layout.pointer_align.abi)?
133137
.expect("cannot be a ZST");
134138

135-
let new_vtable =
136-
self.scalar_to_ptr(new_vtable.read_ptr_sized(Size::ZERO)?.check_init()?)?;
139+
let new_vtable = self.scalar_to_ptr(new_vtable.read_pointer(Size::ZERO)?.check_init()?)?;
137140

138141
Ok(new_vtable)
139142
}

compiler/rustc_middle/src/mir/interpret/allocation.rs

+53-28
Original file line numberDiff line numberDiff line change
@@ -264,9 +264,18 @@ impl<Tag, Extra> Allocation<Tag, Extra> {
264264

265265
/// Byte accessors.
266266
impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
267-
/// The last argument controls whether we error out when there are uninitialized
268-
/// or pointer bytes. You should never call this, call `get_bytes` or
269-
/// `get_bytes_with_uninit_and_ptr` instead,
267+
/// This is the entirely abstraction-violating way to just grab the raw bytes without
268+
/// caring about relocations. It just deduplicates some code between `read_scalar`
269+
/// and `get_bytes_internal`.
270+
fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
271+
&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
272+
}
273+
274+
/// The last argument controls whether we error out when there are uninitialized or pointer
275+
/// bytes. However, we *always* error when there are relocations overlapping the edges of the
276+
/// range.
277+
///
278+
/// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
270279
///
271280
/// This function also guarantees that the resulting pointer will remain stable
272281
/// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
@@ -287,7 +296,7 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
287296
self.check_relocation_edges(cx, range)?;
288297
}
289298

290-
Ok(&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
299+
Ok(self.get_bytes_even_more_internal(range))
291300
}
292301

293302
/// Checks that these bytes are initialized and not pointer bytes, and then return them
@@ -373,6 +382,9 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
373382

374383
/// Reads a *non-ZST* scalar.
375384
///
385+
/// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
386+
/// supports that) provenance is entirely ignored.
387+
///
376388
/// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
377389
/// for ZSTness anyway due to integer pointers being valid for ZSTs.
378390
///
@@ -382,35 +394,47 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
382394
&self,
383395
cx: &impl HasDataLayout,
384396
range: AllocRange,
397+
read_provenance: bool,
385398
) -> AllocResult<ScalarMaybeUninit<Tag>> {
386-
// `get_bytes_with_uninit_and_ptr` tests relocation edges.
387-
// We deliberately error when loading data that partially has provenance, or partially
388-
// initialized data (that's the check below), into a scalar. The LLVM semantics of this are
389-
// unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
390-
// further discussion.
391-
let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
392-
// Uninit check happens *after* we established that the alignment is correct.
393-
// We must not return `Ok()` for unaligned pointers!
399+
if read_provenance {
400+
assert_eq!(range.size, cx.data_layout().pointer_size);
401+
}
402+
403+
// First and foremost, if anything is uninit, bail.
394404
if self.is_init(range).is_err() {
395405
// This inflates uninitialized bytes to the entire scalar, even if only a few
396406
// bytes are uninitialized.
397407
return Ok(ScalarMaybeUninit::Uninit);
398408
}
399-
// Now we do the actual reading.
400-
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
401-
// See if we got a pointer.
402-
if range.size != cx.data_layout().pointer_size {
403-
// Not a pointer.
404-
// *Now*, we better make sure that the inside is free of relocations too.
405-
self.check_relocations(cx, range)?;
406-
} else {
407-
// Maybe a pointer.
408-
if let Some(&prov) = self.relocations.get(&range.start) {
409-
let ptr = Pointer::new(prov, Size::from_bytes(bits));
410-
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
411-
}
409+
410+
// If we are doing a pointer read, and there is a relocation exactly where we
411+
// are reading, then we can put data and relocation back together and return that.
412+
if read_provenance && let Some(&prov) = self.relocations.get(&range.start) {
413+
// We already checked init and relocations, so we can use this function.
414+
let bytes = self.get_bytes_even_more_internal(range);
415+
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
416+
let ptr = Pointer::new(prov, Size::from_bytes(bits));
417+
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
412418
}
413-
// We don't. Just return the bits.
419+
420+
// If we are *not* reading a pointer, and we can just ignore relocations,
421+
// then do exactly that.
422+
if !read_provenance && Tag::OFFSET_IS_ADDR {
423+
// We just strip provenance.
424+
let bytes = self.get_bytes_even_more_internal(range);
425+
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
426+
return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)));
427+
}
428+
429+
// It's complicated. Better make sure there is no provenance anywhere.
430+
// FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
431+
// `read_pointer` is true and we ideally would distinguish the following two cases:
432+
// - The entire `range` is covered by 2 relocations for the same provenance.
433+
// Then we should return a pointer with that provenance.
434+
// - The range has inhomogeneous provenance. Then we should return just the
435+
// underlying bits.
436+
let bytes = self.get_bytes(cx, range)?;
437+
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
414438
Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)))
415439
}
416440

@@ -513,8 +537,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
513537
let start = range.start;
514538
let end = range.end();
515539

516-
// We need to handle clearing the relocations from parts of a pointer. See
517-
// <https://github.com/rust-lang/rust/issues/87184> for details.
540+
// We need to handle clearing the relocations from parts of a pointer.
541+
// FIXME: Miri should preserve partial relocations; see
542+
// https://github.com/rust-lang/miri/issues/2181.
518543
if first < start {
519544
if Tag::ERR_ON_PARTIAL_PTR_OVERWRITE {
520545
return Err(AllocError::PartialPointerOverwrite(first));

compiler/rustc_middle/src/mir/interpret/error.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,7 @@ pub enum UnsupportedOpInfo {
428428
/// Encountered a pointer where we needed raw bytes.
429429
ReadPointerAsBytes,
430430
/// Overwriting parts of a pointer; the resulting state cannot be represented in our
431-
/// `Allocation` data structure.
431+
/// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>.
432432
PartialPointerOverwrite(Pointer<AllocId>),
433433
//
434434
// The variants below are only reachable from CTFE/const prop, miri will never emit them.

compiler/rustc_target/src/abi/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,11 @@ impl Primitive {
747747
pub fn is_int(self) -> bool {
748748
matches!(self, Int(..))
749749
}
750+
751+
#[inline]
752+
pub fn is_ptr(self) -> bool {
753+
matches!(self, Pointer)
754+
}
750755
}
751756

752757
/// Inclusive wrap-around range of valid values, that is, if

0 commit comments

Comments
 (0)