Skip to content

Commit

Permalink
Auto merge of rust-lang#54461 - RalfJung:pointer-provenance, r=oli-obk
Browse files Browse the repository at this point in the history
miri engine: basic support for pointer provenance tracking

This enriches pointers with a new member, `tag`, that can be used to do provenance tracking. This is a new type parameter that propagates up through everything. It defaults to `()` (no tag), which is also the value used by CTFE -- but miri will use another type.

The only actually interesting piece here, I think, is what I had to do in the memory's `get`. The problem is that `tcx` (storing the allocations for statics) uses `()` for provenance information. But the machine might need another tag. The machine has a function to do the conversion, but if a conversion actually happened, we need to store the result of this *somewhere* -- we cannot return a pointer into `tcx` as we usually would.
So I introduced `MonoHashMap` which uses `RefCell` to be able to insert new entries even when we just have a shared ref. However, it is important that we can also return shared refs into the map without holding the `RefCell` opan. This is achieved by boxing the values stored in the map, so their addresses remain stable even when the map's table gets reallocated. This is all implemented in `mono_hash_map.rs`.

NOTE: This PR also contains the commits from rust-lang#54380 (comment). Only the [last two commits](https://github.com/rust-lang/rust/pull/54461/files/8e74ee0998a5b11f28d61600dbb881c7168a4a40..HEAD) are new.
  • Loading branch information
bors committed Oct 10, 2018
2 parents 71d3a71 + bc9435d commit 2243fab
Show file tree
Hide file tree
Showing 19 changed files with 990 additions and 547 deletions.
56 changes: 33 additions & 23 deletions src/librustc/ich/impls_ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,10 +391,39 @@ for ::mir::interpret::ConstValue<'gcx> {
}
}

impl_stable_hash_for!(struct mir::interpret::Pointer {
alloc_id,
offset
});
impl<'a, Tag> HashStable<StableHashingContext<'a>>
for ::mir::interpret::Pointer<Tag>
where Tag: HashStable<StableHashingContext<'a>>
{
fn hash_stable<W: StableHasherResult>(&self,
hcx: &mut StableHashingContext<'a>,
hasher: &mut StableHasher<W>) {
let ::mir::interpret::Pointer { alloc_id, offset, tag } = self;
alloc_id.hash_stable(hcx, hasher);
offset.hash_stable(hcx, hasher);
tag.hash_stable(hcx, hasher);
}
}

impl<'a, Tag> HashStable<StableHashingContext<'a>>
for ::mir::interpret::Scalar<Tag>
where Tag: HashStable<StableHashingContext<'a>>
{
fn hash_stable<W: StableHasherResult>(&self,
hcx: &mut StableHashingContext<'a>,
hasher: &mut StableHasher<W>) {
use mir::interpret::Scalar::*;

mem::discriminant(self).hash_stable(hcx, hasher);
match self {
Bits { bits, size } => {
bits.hash_stable(hcx, hasher);
size.hash_stable(hcx, hasher);
},
Ptr(ptr) => ptr.hash_stable(hcx, hasher),
}
}
}

impl<'a> HashStable<StableHashingContext<'a>> for mir::interpret::AllocId {
fn hash_stable<W: StableHasherResult>(
Expand Down Expand Up @@ -449,25 +478,6 @@ impl_stable_hash_for!(enum ::syntax::ast::Mutability {
Mutable
});


impl<'a> HashStable<StableHashingContext<'a>>
for ::mir::interpret::Scalar {
fn hash_stable<W: StableHasherResult>(&self,
hcx: &mut StableHashingContext<'a>,
hasher: &mut StableHasher<W>) {
use mir::interpret::Scalar::*;

mem::discriminant(self).hash_stable(hcx, hasher);
match *self {
Bits { bits, size } => {
bits.hash_stable(hcx, hasher);
size.hash_stable(hcx, hasher);
},
Ptr(ptr) => ptr.hash_stable(hcx, hasher),
}
}
}

impl_stable_hash_for!(struct ty::Const<'tcx> {
ty,
val
Expand Down
64 changes: 46 additions & 18 deletions src/librustc/mir/interpret/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,54 +138,82 @@ impl<T: layout::HasDataLayout> PointerArithmetic for T {}
/// each context.
///
/// Defaults to the index based and loosely coupled AllocId.
///
/// Pointer is also generic over the `Tag` associated with each pointer,
/// which is used to do provenance tracking during execution.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable, Hash)]
pub struct Pointer<Id=AllocId> {
pub struct Pointer<Tag=(),Id=AllocId> {
pub alloc_id: Id,
pub offset: Size,
pub tag: Tag,
}

/// Produces a `Pointer` which points to the beginning of the Allocation
impl From<AllocId> for Pointer {
#[inline(always)]
fn from(alloc_id: AllocId) -> Self {
Pointer::new(alloc_id, Size::ZERO)
}
}

impl<'tcx> Pointer {
impl<'tcx> Pointer<()> {
#[inline(always)]
pub fn new(alloc_id: AllocId, offset: Size) -> Self {
Pointer { alloc_id, offset }
Pointer { alloc_id, offset, tag: () }
}

#[inline(always)]
pub fn with_default_tag<Tag>(self) -> Pointer<Tag>
where Tag: Default
{
Pointer::new_with_tag(self.alloc_id, self.offset, Default::default())
}
}

impl<'tcx, Tag> Pointer<Tag> {
#[inline(always)]
pub fn new_with_tag(alloc_id: AllocId, offset: Size, tag: Tag) -> Self {
Pointer { alloc_id, offset, tag }
}

pub fn wrapping_signed_offset<C: HasDataLayout>(self, i: i64, cx: C) -> Self {
Pointer::new(
Pointer::new_with_tag(
self.alloc_id,
Size::from_bytes(cx.data_layout().wrapping_signed_offset(self.offset.bytes(), i)),
self.tag,
)
}

pub fn overflowing_signed_offset<C: HasDataLayout>(self, i: i128, cx: C) -> (Self, bool) {
let (res, over) = cx.data_layout().overflowing_signed_offset(self.offset.bytes(), i);
(Pointer::new(self.alloc_id, Size::from_bytes(res)), over)
(Pointer::new_with_tag(self.alloc_id, Size::from_bytes(res), self.tag), over)
}

pub fn signed_offset<C: HasDataLayout>(self, i: i64, cx: C) -> EvalResult<'tcx, Self> {
Ok(Pointer::new(
Ok(Pointer::new_with_tag(
self.alloc_id,
Size::from_bytes(cx.data_layout().signed_offset(self.offset.bytes(), i)?),
self.tag,
))
}

pub fn overflowing_offset<C: HasDataLayout>(self, i: Size, cx: C) -> (Self, bool) {
let (res, over) = cx.data_layout().overflowing_offset(self.offset.bytes(), i.bytes());
(Pointer::new(self.alloc_id, Size::from_bytes(res)), over)
(Pointer::new_with_tag(self.alloc_id, Size::from_bytes(res), self.tag), over)
}

pub fn offset<C: HasDataLayout>(self, i: Size, cx: C) -> EvalResult<'tcx, Self> {
Ok(Pointer::new(
Ok(Pointer::new_with_tag(
self.alloc_id,
Size::from_bytes(cx.data_layout().offset(self.offset.bytes(), i.bytes())?),
self.tag
))
}

#[inline]
pub fn erase_tag(self) -> Pointer {
Pointer { alloc_id: self.alloc_id, offset: self.offset, tag: () }
}
}


Expand Down Expand Up @@ -496,15 +524,15 @@ impl<'tcx, M: fmt::Debug + Eq + Hash + Clone> AllocMap<'tcx, M> {
}

#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, RustcEncodable, RustcDecodable)]
pub struct Allocation {
pub struct Allocation<Tag=()> {
/// The actual bytes of the allocation.
/// Note that the bytes of a pointer represent the offset of the pointer
pub bytes: Vec<u8>,
/// Maps from byte addresses to allocations.
/// Maps from byte addresses to extra data for each pointer.
/// Only the first byte of a pointer is inserted into the map; i.e.,
/// every entry in this map applies to `pointer_size` consecutive bytes starting
/// at the given offset.
pub relocations: Relocations,
pub relocations: Relocations<Tag>,
/// Denotes undefined memory. Reading from undefined memory is forbidden in miri
pub undef_mask: UndefMask,
/// The alignment of the allocation to detect unaligned reads.
Expand All @@ -515,7 +543,7 @@ pub struct Allocation {
pub mutability: Mutability,
}

impl Allocation {
impl<Tag> Allocation<Tag> {
/// Creates a read-only allocation initialized by the given bytes
pub fn from_bytes(slice: &[u8], align: Align) -> Self {
let mut undef_mask = UndefMask::new(Size::ZERO);
Expand Down Expand Up @@ -548,29 +576,29 @@ impl Allocation {
impl<'tcx> ::serialize::UseSpecializedDecodable for &'tcx Allocation {}

#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, RustcEncodable, RustcDecodable)]
pub struct Relocations<Id=AllocId>(SortedMap<Size, Id>);
pub struct Relocations<Tag=(), Id=AllocId>(SortedMap<Size, (Tag, Id)>);

impl<Id> Relocations<Id> {
impl<Tag, Id> Relocations<Tag, Id> {
pub fn new() -> Self {
Relocations(SortedMap::new())
}

// The caller must guarantee that the given relocations are already sorted
// by address and contain no duplicates.
pub fn from_presorted(r: Vec<(Size, Id)>) -> Self {
pub fn from_presorted(r: Vec<(Size, (Tag, Id))>) -> Self {
Relocations(SortedMap::from_presorted_elements(r))
}
}

impl Deref for Relocations {
type Target = SortedMap<Size, AllocId>;
impl<Tag> Deref for Relocations<Tag> {
type Target = SortedMap<Size, (Tag, AllocId)>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl DerefMut for Relocations {
impl<Tag> DerefMut for Relocations<Tag> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
Expand Down
68 changes: 44 additions & 24 deletions src/librustc/mir/interpret/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,47 @@ impl<'tcx> ConstValue<'tcx> {
}
}

impl<'tcx> Scalar {
/// A `Scalar` represents an immediate, primitive value existing outside of a
/// `memory::Allocation`. It is in many ways like a small chunk of a `Allocation`, up to 8 bytes in
/// size. Like a range of bytes in an `Allocation`, a `Scalar` can either represent the raw bytes
/// of a simple value or a pointer into another `Allocation`
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable, Hash)]
pub enum Scalar<Tag=(), Id=AllocId> {
/// The raw bytes of a simple value.
Bits {
/// The first `size` bytes are the value.
/// Do not try to read less or more bytes that that. The remaining bytes must be 0.
size: u8,
bits: u128,
},

/// A pointer into an `Allocation`. An `Allocation` in the `memory` module has a list of
/// relocations, but a `Scalar` is only large enough to contain one, so we just represent the
/// relocation and its associated offset together as a `Pointer` here.
Ptr(Pointer<Tag, Id>),
}

impl<'tcx> Scalar<()> {
#[inline]
pub fn with_default_tag<Tag>(self) -> Scalar<Tag>
where Tag: Default
{
match self {
Scalar::Ptr(ptr) => Scalar::Ptr(ptr.with_default_tag()),
Scalar::Bits { bits, size } => Scalar::Bits { bits, size },
}
}
}

impl<'tcx, Tag> Scalar<Tag> {
#[inline]
pub fn erase_tag(self) -> Scalar {
match self {
Scalar::Ptr(ptr) => Scalar::Ptr(ptr.erase_tag()),
Scalar::Bits { bits, size } => Scalar::Bits { bits, size },
}
}

#[inline]
pub fn ptr_null(cx: impl HasDataLayout) -> Self {
Scalar::Bits {
Expand Down Expand Up @@ -208,7 +248,7 @@ impl<'tcx> Scalar {
}

#[inline]
pub fn to_ptr(self) -> EvalResult<'tcx, Pointer> {
pub fn to_ptr(self) -> EvalResult<'tcx, Pointer<Tag>> {
match self {
Scalar::Bits { bits: 0, .. } => err!(InvalidNullPointerUsage),
Scalar::Bits { .. } => err!(ReadBytesAsPointer),
Expand Down Expand Up @@ -317,29 +357,9 @@ impl<'tcx> Scalar {
}
}

impl From<Pointer> for Scalar {
impl<Tag> From<Pointer<Tag>> for Scalar<Tag> {
#[inline(always)]
fn from(ptr: Pointer) -> Self {
fn from(ptr: Pointer<Tag>) -> Self {
Scalar::Ptr(ptr)
}
}

/// A `Scalar` represents an immediate, primitive value existing outside of a
/// `memory::Allocation`. It is in many ways like a small chunk of a `Allocation`, up to 8 bytes in
/// size. Like a range of bytes in an `Allocation`, a `Scalar` can either represent the raw bytes
/// of a simple value or a pointer into another `Allocation`
#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, RustcEncodable, RustcDecodable, Hash)]
pub enum Scalar<Id=AllocId> {
/// The raw bytes of a simple value.
Bits {
/// The first `size` bytes are the value.
/// Do not try to read less or more bytes that that. The remaining bytes must be 0.
size: u8,
bits: u128,
},

/// A pointer into an `Allocation`. An `Allocation` in the `memory` module has a list of
/// relocations, but a `Scalar` is only large enough to contain one, so we just represent the
/// relocation and its associated offset together as a `Pointer` here.
Ptr(Pointer<Id>),
}
4 changes: 2 additions & 2 deletions src/librustc_codegen_llvm/mir/constant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
let pointer_size = layout.pointer_size.bytes() as usize;

let mut next_offset = 0;
for &(offset, alloc_id) in alloc.relocations.iter() {
for &(offset, ((), alloc_id)) in alloc.relocations.iter() {
let offset = offset.bytes();
assert_eq!(offset as usize as u64, offset);
let offset = offset as usize;
Expand All @@ -105,7 +105,7 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx<'ll, '_>, alloc: &Allocation) -> &'ll
).expect("const_alloc_to_llvm: could not read relocation pointer") as u64;
llvals.push(scalar_to_llvm(
cx,
Pointer { alloc_id, offset: Size::from_bytes(ptr_offset) }.into(),
Pointer::new(alloc_id, Size::from_bytes(ptr_offset)).into(),
&layout::Scalar {
value: layout::Primitive::Pointer,
valid_range: 0..=!0
Expand Down
Loading

0 comments on commit 2243fab

Please sign in to comment.