Skip to content

Commit 21b8f06

Browse files
committed
interpret/allocation: fix aliasing issue in interpreter and refactor getters a bit
- rename mutating functions to be more scary - add a new raw bytes getter
1 parent ee03c28 commit 21b8f06

File tree

4 files changed

+55
-22
lines changed

4 files changed

+55
-22
lines changed

compiler/rustc_const_eval/src/interpret/machine.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -555,7 +555,7 @@ pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
555555

556556
type AllocExtra = ();
557557
type FrameExtra = ();
558-
type Bytes = Box<[u8]>;
558+
type Bytes = Vec<u8>;
559559

560560
#[inline(always)]
561561
fn ignore_optional_overflow_checks(_ecx: &InterpCx<$mir, $tcx, Self>) -> bool {

compiler/rustc_const_eval/src/interpret/memory.rs

+16-5
Original file line numberDiff line numberDiff line change
@@ -123,15 +123,15 @@ pub struct Memory<'mir, 'tcx, M: Machine<'mir, 'tcx>> {
123123
/// A reference to some allocation that was already bounds-checked for the given region
124124
/// and had the on-access machine hooks run.
125125
#[derive(Copy, Clone)]
126-
pub struct AllocRef<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Box<[u8]>> {
126+
pub struct AllocRef<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Vec<u8>> {
127127
alloc: &'a Allocation<Prov, Extra, Bytes>,
128128
range: AllocRange,
129129
tcx: TyCtxt<'tcx>,
130130
alloc_id: AllocId,
131131
}
132132
/// A reference to some allocation that was already bounds-checked for the given region
133133
/// and had the on-access machine hooks run.
134-
pub struct AllocRefMut<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Box<[u8]>> {
134+
pub struct AllocRefMut<'a, 'tcx, Prov: Provenance, Extra, Bytes: AllocBytes = Vec<u8>> {
135135
alloc: &'a mut Allocation<Prov, Extra, Bytes>,
136136
range: AllocRange,
137137
tcx: TyCtxt<'tcx>,
@@ -1157,11 +1157,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
11571157
};
11581158

11591159
// Side-step AllocRef and directly access the underlying bytes more efficiently.
1160-
// (We are staying inside the bounds here so all is good.)
1160+
// (We are staying inside the bounds here and all bytes do get overwritten so all is good.)
11611161
let alloc_id = alloc_ref.alloc_id;
11621162
let bytes = alloc_ref
11631163
.alloc
1164-
.get_bytes_mut(&alloc_ref.tcx, alloc_ref.range)
1164+
.get_bytes_unchecked_for_overwrite(&alloc_ref.tcx, alloc_ref.range)
11651165
.map_err(move |e| e.to_interp_error(alloc_id))?;
11661166
// `zip` would stop when the first iterator ends; we want to definitely
11671167
// cover all of `bytes`.
@@ -1182,6 +1182,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
11821182
self.mem_copy_repeatedly(src, dest, size, 1, nonoverlapping)
11831183
}
11841184

1185+
/// Performs `num_copies` many copies of `size` many bytes from `src` to `dest + i*size` (where
1186+
/// `i` is the index of the copy).
1187+
///
1188+
/// Either `nonoverlapping` must be true or `num_copies` must be 1; doing repeated copies that
1189+
/// may overlap is not supported.
11851190
pub fn mem_copy_repeatedly(
11861191
&mut self,
11871192
src: Pointer<Option<M::Provenance>>,
@@ -1243,8 +1248,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
12431248
(dest_alloc_id, dest_prov),
12441249
dest_range,
12451250
)?;
1251+
// Yes we do overwrite all bytes in `dest_bytes`.
12461252
let dest_bytes = dest_alloc
1247-
.get_bytes_mut_ptr(&tcx, dest_range)
1253+
.get_bytes_unchecked_for_overwrite_ptr(&tcx, dest_range)
12481254
.map_err(|e| e.to_interp_error(dest_alloc_id))?
12491255
.as_mut_ptr();
12501256

@@ -1278,6 +1284,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
12781284
}
12791285
}
12801286
}
1287+
if num_copies > 1 {
1288+
assert!(nonoverlapping, "multi-copy only supported in non-overlapping mode");
1289+
}
12811290

12821291
let size_in_bytes = size.bytes_usize();
12831292
// For particularly large arrays (where this is perf-sensitive) it's common that
@@ -1290,6 +1299,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
12901299
} else if src_alloc_id == dest_alloc_id {
12911300
let mut dest_ptr = dest_bytes;
12921301
for _ in 0..num_copies {
1302+
// Here we rely on `src` and `dest` being non-overlapping if there is more than
1303+
// one copy.
12931304
ptr::copy(src_bytes, dest_ptr, size_in_bytes);
12941305
dest_ptr = dest_ptr.add(size_in_bytes);
12951306
}

compiler/rustc_middle/src/mir/interpret/allocation.rs

+37-15
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,33 @@ pub trait AllocBytes:
3737
/// Create a zeroed `AllocBytes` of the specified size and alignment.
3838
/// Returns `None` if we ran out of memory on the host.
3939
fn zeroed(size: Size, _align: Align) -> Option<Self>;
40+
41+
/// Gives direct access to the raw underlying storage.
42+
///
43+
/// Crucially this pointer is compatible with:
44+
/// - other pointers retunred by this method, and
45+
/// - references returned from `deref()`, as long as there was no write.
46+
fn as_mut_ptr(&mut self) -> *mut u8;
4047
}
4148

42-
// Default `bytes` for `Allocation` is a `Box<[u8]>`.
43-
impl AllocBytes for Box<[u8]> {
49+
/// Default `bytes` for `Allocation` is a `Vec<u8>`.
50+
///
51+
/// We use `Vec`, not `Box`, since we need `Vec::as_mut_ptr` and how it interacts with other
52+
/// pointers to the backing buffer. `Box` has no corresponding method.
53+
impl AllocBytes for Vec<u8> {
4454
fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align) -> Self {
45-
Box::<[u8]>::from(slice.into())
55+
slice.into().into_owned()
4656
}
4757

4858
fn zeroed(size: Size, _align: Align) -> Option<Self> {
4959
let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).ok()?;
5060
// SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
5161
let bytes = unsafe { bytes.assume_init() };
52-
Some(bytes)
62+
Some(bytes.into())
63+
}
64+
65+
fn as_mut_ptr(&mut self) -> *mut u8 {
66+
Vec::as_mut_ptr(self)
5367
}
5468
}
5569

@@ -62,7 +76,7 @@ impl AllocBytes for Box<[u8]> {
6276
// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
6377
#[derive(Clone, Eq, PartialEq, TyEncodable, TyDecodable)]
6478
#[derive(HashStable)]
65-
pub struct Allocation<Prov: Provenance = CtfeProvenance, Extra = (), Bytes = Box<[u8]>> {
79+
pub struct Allocation<Prov: Provenance = CtfeProvenance, Extra = (), Bytes = Vec<u8>> {
6680
/// The actual bytes of the allocation.
6781
/// Note that the bytes of a pointer represent the offset of the pointer.
6882
bytes: Bytes,
@@ -399,10 +413,6 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
399413

400414
/// Byte accessors.
401415
impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes> {
402-
pub fn base_addr(&self) -> *const u8 {
403-
self.bytes.as_ptr()
404-
}
405-
406416
/// This is the entirely abstraction-violating way to just grab the raw bytes without
407417
/// caring about provenance or initialization.
408418
///
@@ -452,13 +462,14 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
452462
Ok(self.get_bytes_unchecked(range))
453463
}
454464

455-
/// Just calling this already marks everything as defined and removes provenance,
456-
/// so be sure to actually put data there!
465+
/// This is the entirely abstraction-violating way to just get mutable access to the raw bytes.
466+
/// Just calling this already marks everything as defined and removes provenance, so be sure to
467+
/// actually overwrite all the data there!
457468
///
458469
/// It is the caller's responsibility to check bounds and alignment beforehand.
459470
/// Most likely, you want to use the `PlaceTy` and `OperandTy`-based methods
460471
/// on `InterpCx` instead.
461-
pub fn get_bytes_mut(
472+
pub fn get_bytes_unchecked_for_overwrite(
462473
&mut self,
463474
cx: &impl HasDataLayout,
464475
range: AllocRange,
@@ -469,8 +480,9 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
469480
Ok(&mut self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
470481
}
471482

472-
/// A raw pointer variant of `get_bytes_mut` that avoids invalidating existing aliases into this memory.
473-
pub fn get_bytes_mut_ptr(
483+
/// A raw pointer variant of `get_bytes_unchecked_for_overwrite` that avoids invalidating existing immutable aliases
484+
/// into this memory.
485+
pub fn get_bytes_unchecked_for_overwrite_ptr(
474486
&mut self,
475487
cx: &impl HasDataLayout,
476488
range: AllocRange,
@@ -479,10 +491,19 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
479491
self.provenance.clear(range, cx)?;
480492

481493
assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check
494+
// Cruciall, we go via `AllocBytes::as_mut_ptr`, not `AllocBytes::deref_mut`.
482495
let begin_ptr = self.bytes.as_mut_ptr().wrapping_add(range.start.bytes_usize());
483496
let len = range.end().bytes_usize() - range.start.bytes_usize();
484497
Ok(ptr::slice_from_raw_parts_mut(begin_ptr, len))
485498
}
499+
500+
/// This gives direct mutable access to the entire buffer, just exposing their internal state
501+
/// without reseting anything. Directly exposes `AllocBytes::as_mut_ptr`. Only works if
502+
/// `OFFSET_IS_ADDR` is true.
503+
pub fn get_bytes_unchecked_raw_mut(&mut self) -> *mut u8 {
504+
assert!(Prov::OFFSET_IS_ADDR);
505+
self.bytes.as_mut_ptr()
506+
}
486507
}
487508

488509
/// Reading and writing.
@@ -589,7 +610,8 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
589610
};
590611

591612
let endian = cx.data_layout().endian;
592-
let dst = self.get_bytes_mut(cx, range)?;
613+
// Yes we do overwrite all the bytes in `dst`.
614+
let dst = self.get_bytes_unchecked_for_overwrite(cx, range)?;
593615
write_target_uint(endian, dst, bytes).unwrap();
594616

595617
// See if we have to also store some provenance.

src/tools/miri/src/machine.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
861861

862862
type Provenance = Provenance;
863863
type ProvenanceExtra = ProvenanceExtra;
864-
type Bytes = Box<[u8]>;
864+
type Bytes = Vec<u8>;
865865

866866
type MemoryMap = MonoHashMap<
867867
AllocId,

0 commit comments

Comments
 (0)