Skip to content

Commit

Permalink
Auto merge of #97684 - RalfJung:better-provenance-control, r=oli-obk
Browse files Browse the repository at this point in the history
interpret: better control over whether we read data with provenance

The resolution in rust-lang/unsafe-code-guidelines#286 seems to be that when we load data at integer type, we implicitly strip provenance. So let's implement that in Miri at least for scalar loads. This makes use of the fact that `Scalar` layouts distinguish pointer-sized integers and pointers -- so I was expecting some wild bugs where layouts set this incorrectly, but so far that does not seem to happen.

This does not entirely implement the solution to rust-lang/unsafe-code-guidelines#286; we still do the wrong thing for integers in larger types: we will `copy_op` them and then do validation, and validation will complain about the provenance. To fix that we need mutating validation; validation needs to strip the provenance rather than complaining about it. This is a larger undertaking (but will also help resolve rust-lang/miri#845 since we can reset padding to `Uninit`).

The reason this is useful is that we can now implement `addr` as a `transmute` from a pointer to an integer, and actually get the desired behavior of stripping provenance without exposing it!
  • Loading branch information
bors committed Jun 6, 2022
2 parents 79b6bad + d208f80 commit 9d20fd1
Show file tree
Hide file tree
Showing 22 changed files with 494 additions and 404 deletions.
23 changes: 19 additions & 4 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -908,11 +908,15 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRefMut<'a, 'tcx, Tag, Extra> {
}

impl<'tcx, 'a, Tag: Provenance, Extra> AllocRef<'a, 'tcx, Tag, Extra> {
pub fn read_scalar(&self, range: AllocRange) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
pub fn read_scalar(
&self,
range: AllocRange,
read_provenance: bool,
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
let range = self.range.subrange(range);
let res = self
.alloc
.read_scalar(&self.tcx, range)
.read_scalar(&self.tcx, range, read_provenance)
.map_err(|e| e.to_interp_error(self.alloc_id))?;
debug!(
"read_scalar in {} at {:#x}, size {}: {:?}",
Expand All @@ -924,8 +928,19 @@ impl<'tcx, 'a, Tag: Provenance, Extra> AllocRef<'a, 'tcx, Tag, Extra> {
Ok(res)
}

pub fn read_ptr_sized(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
self.read_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size))
pub fn read_integer(
&self,
offset: Size,
size: Size,
) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
self.read_scalar(alloc_range(offset, size), /*read_provenance*/ false)
}

pub fn read_pointer(&self, offset: Size) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
self.read_scalar(
alloc_range(offset, self.tcx.data_layout().pointer_size),
/*read_provenance*/ true,
)
}

pub fn check_bytes(
Expand Down
21 changes: 15 additions & 6 deletions compiler/rustc_const_eval/src/interpret/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ use rustc_target::abi::{VariantIdx, Variants};

use super::{
alloc_range, from_known_layout, mir_assign_valid_types, AllocId, ConstValue, GlobalId,
InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, Place, PlaceTy, Pointer, Provenance,
Scalar, ScalarMaybeUninit,
InterpCx, InterpResult, MPlaceTy, Machine, MemPlace, Place, PlaceTy, Pointer,
PointerArithmetic, Provenance, Scalar, ScalarMaybeUninit,
};

/// An `Immediate` represents a single immediate self-contained Rust value.
Expand Down Expand Up @@ -284,11 +284,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
Abi::Scalar(s) if force => Some(s.primitive()),
_ => None,
};
if let Some(_s) = scalar_layout {
let read_provenance = |s: abi::Primitive, size| {
// Should be just `s.is_ptr()`, but we support a Miri flag that accepts more
// questionable ptr-int transmutes.
let number_may_have_provenance = !M::enforce_number_no_provenance(self);
s.is_ptr() || (number_may_have_provenance && size == self.pointer_size())
};
if let Some(s) = scalar_layout {
//FIXME(#96185): let size = s.size(self);
//FIXME(#96185): assert_eq!(size, mplace.layout.size, "abi::Scalar size does not match layout size");
let size = mplace.layout.size; //FIXME(#96185): remove this line
let scalar = alloc.read_scalar(alloc_range(Size::ZERO, size))?;
let scalar =
alloc.read_scalar(alloc_range(Size::ZERO, size), read_provenance(s, size))?;
return Ok(Some(ImmTy { imm: scalar.into(), layout: mplace.layout }));
}
let scalar_pair_layout = match mplace.layout.abi {
Expand All @@ -306,8 +313,10 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let (a_size, b_size) = (a.size(self), b.size(self));
let b_offset = a_size.align_to(b.align(self).abi);
assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields
let a_val = alloc.read_scalar(alloc_range(Size::ZERO, a_size))?;
let b_val = alloc.read_scalar(alloc_range(b_offset, b_size))?;
let a_val =
alloc.read_scalar(alloc_range(Size::ZERO, a_size), read_provenance(a, a_size))?;
let b_val =
alloc.read_scalar(alloc_range(b_offset, b_size), read_provenance(b, b_size))?;
return Ok(Some(ImmTy {
imm: Immediate::ScalarPair(a_val, b_val),
layout: mplace.layout,
Expand Down
19 changes: 11 additions & 8 deletions compiler/rustc_const_eval/src/interpret/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let vtable_slot = self
.get_ptr_alloc(vtable_slot, ptr_size, self.tcx.data_layout.pointer_align.abi)?
.expect("cannot be a ZST");
let fn_ptr = self.scalar_to_ptr(vtable_slot.read_ptr_sized(Size::ZERO)?.check_init()?)?;
let fn_ptr = self.scalar_to_ptr(vtable_slot.read_pointer(Size::ZERO)?.check_init()?)?;
self.get_ptr_fn(fn_ptr)
}

Expand All @@ -69,9 +69,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
)?
.expect("cannot be a ZST");
let drop_fn = vtable
.read_ptr_sized(
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_DROPINPLACE).unwrap(),
)?
.read_pointer(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_DROPINPLACE).unwrap())?
.check_init()?;
// We *need* an instance here, no other kind of function value, to be able
// to determine the type.
Expand Down Expand Up @@ -104,12 +102,18 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
)?
.expect("cannot be a ZST");
let size = vtable
.read_ptr_sized(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_SIZE).unwrap())?
.read_integer(
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_SIZE).unwrap(),
pointer_size,
)?
.check_init()?;
let size = size.to_machine_usize(self)?;
let size = Size::from_bytes(size);
let align = vtable
.read_ptr_sized(pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_ALIGN).unwrap())?
.read_integer(
pointer_size * u64::try_from(COMMON_VTABLE_ENTRIES_ALIGN).unwrap(),
pointer_size,
)?
.check_init()?;
let align = align.to_machine_usize(self)?;
let align = Align::from_bytes(align).map_err(|e| err_ub!(InvalidVtableAlignment(e)))?;
Expand All @@ -132,8 +136,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
.get_ptr_alloc(vtable_slot, pointer_size, self.tcx.data_layout.pointer_align.abi)?
.expect("cannot be a ZST");

let new_vtable =
self.scalar_to_ptr(new_vtable.read_ptr_sized(Size::ZERO)?.check_init()?)?;
let new_vtable = self.scalar_to_ptr(new_vtable.read_pointer(Size::ZERO)?.check_init()?)?;

Ok(new_vtable)
}
Expand Down
81 changes: 53 additions & 28 deletions compiler/rustc_middle/src/mir/interpret/allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,9 +264,18 @@ impl<Tag, Extra> Allocation<Tag, Extra> {

/// Byte accessors.
impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
/// The last argument controls whether we error out when there are uninitialized
/// or pointer bytes. You should never call this, call `get_bytes` or
/// `get_bytes_with_uninit_and_ptr` instead,
/// This is the entirely abstraction-violating way to just grab the raw bytes without
/// caring about relocations. It just deduplicates some code between `read_scalar`
/// and `get_bytes_internal`.
fn get_bytes_even_more_internal(&self, range: AllocRange) -> &[u8] {
&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()]
}

/// The last argument controls whether we error out when there are uninitialized or pointer
/// bytes. However, we *always* error when there are relocations overlapping the edges of the
/// range.
///
/// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
///
/// This function also guarantees that the resulting pointer will remain stable
/// even when new allocations are pushed to the `HashMap`. `mem_copy_repeatedly` relies
Expand All @@ -287,7 +296,7 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
self.check_relocation_edges(cx, range)?;
}

Ok(&self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
Ok(self.get_bytes_even_more_internal(range))
}

/// Checks that these bytes are initialized and not pointer bytes, and then return them
Expand Down Expand Up @@ -373,6 +382,9 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {

/// Reads a *non-ZST* scalar.
///
/// If `read_provenance` is `true`, this will also read provenance; otherwise (if the machine
/// supports that) provenance is entirely ignored.
///
/// ZSTs can't be read because in order to obtain a `Pointer`, we need to check
/// for ZSTness anyway due to integer pointers being valid for ZSTs.
///
Expand All @@ -382,35 +394,47 @@ impl<Tag: Provenance, Extra> Allocation<Tag, Extra> {
&self,
cx: &impl HasDataLayout,
range: AllocRange,
read_provenance: bool,
) -> AllocResult<ScalarMaybeUninit<Tag>> {
// `get_bytes_with_uninit_and_ptr` tests relocation edges.
// We deliberately error when loading data that partially has provenance, or partially
// initialized data (that's the check below), into a scalar. The LLVM semantics of this are
// unclear so we are conservative. See <https://github.com/rust-lang/rust/issues/69488> for
// further discussion.
let bytes = self.get_bytes_with_uninit_and_ptr(cx, range)?;
// Uninit check happens *after* we established that the alignment is correct.
// We must not return `Ok()` for unaligned pointers!
if read_provenance {
assert_eq!(range.size, cx.data_layout().pointer_size);
}

// First and foremost, if anything is uninit, bail.
if self.is_init(range).is_err() {
// This inflates uninitialized bytes to the entire scalar, even if only a few
// bytes are uninitialized.
return Ok(ScalarMaybeUninit::Uninit);
}
// Now we do the actual reading.
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
// See if we got a pointer.
if range.size != cx.data_layout().pointer_size {
// Not a pointer.
// *Now*, we better make sure that the inside is free of relocations too.
self.check_relocations(cx, range)?;
} else {
// Maybe a pointer.
if let Some(&prov) = self.relocations.get(&range.start) {
let ptr = Pointer::new(prov, Size::from_bytes(bits));
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
}

// If we are doing a pointer read, and there is a relocation exactly where we
// are reading, then we can put data and relocation back together and return that.
if read_provenance && let Some(&prov) = self.relocations.get(&range.start) {
// We already checked init and relocations, so we can use this function.
let bytes = self.get_bytes_even_more_internal(range);
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
let ptr = Pointer::new(prov, Size::from_bytes(bits));
return Ok(ScalarMaybeUninit::from_pointer(ptr, cx));
}
// We don't. Just return the bits.

// If we are *not* reading a pointer, and we can just ignore relocations,
// then do exactly that.
if !read_provenance && Tag::OFFSET_IS_ADDR {
// We just strip provenance.
let bytes = self.get_bytes_even_more_internal(range);
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
return Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)));
}

// It's complicated. Better make sure there is no provenance anywhere.
// FIXME: If !OFFSET_IS_ADDR, this is the best we can do. But if OFFSET_IS_ADDR, then
// `read_pointer` is true and we ideally would distinguish the following two cases:
// - The entire `range` is covered by 2 relocations for the same provenance.
// Then we should return a pointer with that provenance.
// - The range has inhomogeneous provenance. Then we should return just the
// underlying bits.
let bytes = self.get_bytes(cx, range)?;
let bits = read_target_uint(cx.data_layout().endian, bytes).unwrap();
Ok(ScalarMaybeUninit::Scalar(Scalar::from_uint(bits, range.size)))
}

Expand Down Expand Up @@ -513,8 +537,9 @@ impl<Tag: Copy, Extra> Allocation<Tag, Extra> {
let start = range.start;
let end = range.end();

// We need to handle clearing the relocations from parts of a pointer. See
// <https://github.com/rust-lang/rust/issues/87184> for details.
// We need to handle clearing the relocations from parts of a pointer.
// FIXME: Miri should preserve partial relocations; see
// https://github.com/rust-lang/miri/issues/2181.
if first < start {
if Tag::ERR_ON_PARTIAL_PTR_OVERWRITE {
return Err(AllocError::PartialPointerOverwrite(first));
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_middle/src/mir/interpret/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ pub enum UnsupportedOpInfo {
/// Encountered a pointer where we needed raw bytes.
ReadPointerAsBytes,
/// Overwriting parts of a pointer; the resulting state cannot be represented in our
/// `Allocation` data structure.
/// `Allocation` data structure. See <https://github.com/rust-lang/miri/issues/2181>.
PartialPointerOverwrite(Pointer<AllocId>),
//
// The variants below are only reachable from CTFE/const prop, miri will never emit them.
Expand Down
5 changes: 5 additions & 0 deletions compiler/rustc_target/src/abi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,11 @@ impl Primitive {
pub fn is_int(self) -> bool {
matches!(self, Int(..))
}

#[inline]
pub fn is_ptr(self) -> bool {
matches!(self, Pointer)
}
}

/// Inclusive wrap-around range of valid values, that is, if
Expand Down
Loading

0 comments on commit 9d20fd1

Please sign in to comment.