Skip to content

Commit 576176d

Browse files
authored
Rollup merge of #133211 - Strophox:miri-correct-state-update-ffi, r=RalfJung
Extend Miri to correctly pass mutable pointers through FFI Based off of #129684, this PR further extends Miri to execute native calls that make use of pointers to *mutable* memory. We adapt Miri's bookkeeping of internal state upon any FFI call that gives external code permission to mutate memory. Native code may now possibly write and therefore initialize and change the pointer provenance of bytes it has access to: Such memory is assumed to be *initialized* afterwards and bytes are given *arbitrary (wildcard) provenance*. This enables programs that correctly use mutating FFI calls to run Miri without errors, at the cost of possibly missing Undefined Behaviour caused by incorrect usage of mutating FFI. > <details> > > <summary> Simple example </summary> > > ```rust > extern "C" { > fn init_int(ptr: *mut i32); > } > > fn main() { > let mut x = std::mem::MaybeUninit::<i32>::uninit(); > let x = unsafe { > init_int(x.as_mut_ptr()); > x.assume_init() > }; > > println!("C initialized my memory to: {x}"); > } > ``` > ```c > void init_int(int *ptr) { > *ptr = 42; > } > ``` > should now show `C initialized my memory to: 42`. > > </details> r? ``@RalfJung``
2 parents 820ddaf + 712ceab commit 576176d

File tree

19 files changed

+476
-59
lines changed

19 files changed

+476
-59
lines changed

Diff for: compiler/rustc_const_eval/src/const_eval/dummy_machine.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,9 @@ impl<'tcx> interpret::Machine<'tcx> for DummyMachine {
168168
})
169169
}
170170

171-
fn expose_ptr(
172-
_ecx: &mut InterpCx<'tcx, Self>,
173-
_ptr: interpret::Pointer<Self::Provenance>,
171+
fn expose_provenance(
172+
_ecx: &InterpCx<'tcx, Self>,
173+
_provenance: Self::Provenance,
174174
) -> interpret::InterpResult<'tcx> {
175175
unimplemented!()
176176
}

Diff for: compiler/rustc_const_eval/src/const_eval/machine.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,8 @@ use crate::errors::{LongRunning, LongRunningWarn};
2121
use crate::fluent_generated as fluent;
2222
use crate::interpret::{
2323
self, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, Frame, GlobalAlloc, ImmTy,
24-
InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, RangeSet, Scalar, compile_time_machine,
25-
interp_ok, throw_exhaust, throw_inval, throw_ub, throw_ub_custom, throw_unsup,
26-
throw_unsup_format,
24+
InterpCx, InterpResult, MPlaceTy, OpTy, RangeSet, Scalar, compile_time_machine, interp_ok,
25+
throw_exhaust, throw_inval, throw_ub, throw_ub_custom, throw_unsup, throw_unsup_format,
2726
};
2827

2928
/// When hitting this many interpreted terminators we emit a deny by default lint
@@ -586,7 +585,10 @@ impl<'tcx> interpret::Machine<'tcx> for CompileTimeMachine<'tcx> {
586585
}
587586

588587
#[inline(always)]
589-
fn expose_ptr(_ecx: &mut InterpCx<'tcx, Self>, _ptr: Pointer) -> InterpResult<'tcx> {
588+
fn expose_provenance(
589+
_ecx: &InterpCx<'tcx, Self>,
590+
_provenance: Self::Provenance,
591+
) -> InterpResult<'tcx> {
590592
// This is only reachable with -Zunleash-the-miri-inside-of-you.
591593
throw_unsup_format!("exposing pointers is not possible at compile-time")
592594
}

Diff for: compiler/rustc_const_eval/src/interpret/cast.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
238238
let scalar = src.to_scalar();
239239
let ptr = scalar.to_pointer(self)?;
240240
match ptr.into_pointer_or_addr() {
241-
Ok(ptr) => M::expose_ptr(self, ptr)?,
241+
Ok(ptr) => M::expose_provenance(self, ptr.provenance)?,
242242
Err(_) => {} // Do nothing, exposing an invalid pointer (`None` provenance) is a NOP.
243243
};
244244
interp_ok(ImmTy::from_scalar(

Diff for: compiler/rustc_const_eval/src/interpret/machine.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -327,11 +327,11 @@ pub trait Machine<'tcx>: Sized {
327327
addr: u64,
328328
) -> InterpResult<'tcx, Pointer<Option<Self::Provenance>>>;
329329

330-
/// Marks a pointer as exposed, allowing it's provenance
330+
/// Marks a pointer as exposed, allowing its provenance
331331
/// to be recovered. "Pointer-to-int cast"
332-
fn expose_ptr(
333-
ecx: &mut InterpCx<'tcx, Self>,
334-
ptr: Pointer<Self::Provenance>,
332+
fn expose_provenance(
333+
ecx: &InterpCx<'tcx, Self>,
334+
provenance: Self::Provenance,
335335
) -> InterpResult<'tcx>;
336336

337337
/// Convert a pointer with provenance into an allocation-offset pair and extra provenance info.

Diff for: compiler/rustc_const_eval/src/interpret/memory.rs

+46
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,52 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
944944
interp_ok(())
945945
}
946946

947+
/// Handle the effect an FFI call might have on the state of allocations.
948+
/// This overapproximates the modifications which external code might make to memory:
949+
/// We set all reachable allocations as initialized, mark all provenances as exposed
950+
/// and overwrite them with `Provenance::WILDCARD`.
951+
pub fn prepare_for_native_call(
952+
&mut self,
953+
id: AllocId,
954+
initial_prov: M::Provenance,
955+
) -> InterpResult<'tcx> {
956+
// Expose provenance of the root allocation.
957+
M::expose_provenance(self, initial_prov)?;
958+
959+
let mut done = FxHashSet::default();
960+
let mut todo = vec![id];
961+
while let Some(id) = todo.pop() {
962+
if !done.insert(id) {
963+
// We already saw this allocation before, don't process it again.
964+
continue;
965+
}
966+
let info = self.get_alloc_info(id);
967+
968+
// If there is no data behind this pointer, skip this.
969+
if !matches!(info.kind, AllocKind::LiveData) {
970+
continue;
971+
}
972+
973+
// Expose all provenances in this allocation, and add them to `todo`.
974+
let alloc = self.get_alloc_raw(id)?;
975+
for prov in alloc.provenance().provenances() {
976+
M::expose_provenance(self, prov)?;
977+
if let Some(id) = prov.get_alloc_id() {
978+
todo.push(id);
979+
}
980+
}
981+
982+
// Prepare for possible write from native code if mutable.
983+
if info.mutbl.is_mut() {
984+
self.get_alloc_raw_mut(id)?
985+
.0
986+
.prepare_for_native_write()
987+
.map_err(|e| e.to_interp_error(id))?;
988+
}
989+
}
990+
interp_ok(())
991+
}
992+
947993
/// Create a lazy debug printer that prints the given allocation and all allocations it points
948994
/// to, recursively.
949995
#[must_use]

Diff for: compiler/rustc_middle/src/mir/interpret/allocation.rs

+22
Original file line numberDiff line numberDiff line change
@@ -643,6 +643,28 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
643643
Ok(())
644644
}
645645

646+
/// Initialize all previously uninitialized bytes in the entire allocation, and set
647+
/// provenance of everything to `Wildcard`. Before calling this, make sure all
648+
/// provenance in this allocation is exposed!
649+
pub fn prepare_for_native_write(&mut self) -> AllocResult {
650+
let full_range = AllocRange { start: Size::ZERO, size: Size::from_bytes(self.len()) };
651+
// Overwrite uninitialized bytes with 0, to ensure we don't leak whatever their value happens to be.
652+
for chunk in self.init_mask.range_as_init_chunks(full_range) {
653+
if !chunk.is_init() {
654+
let uninit_bytes = &mut self.bytes
655+
[chunk.range().start.bytes_usize()..chunk.range().end.bytes_usize()];
656+
uninit_bytes.fill(0);
657+
}
658+
}
659+
// Mark everything as initialized now.
660+
self.mark_init(full_range, true);
661+
662+
// Set provenance of all bytes to wildcard.
663+
self.provenance.write_wildcards(self.len());
664+
665+
Ok(())
666+
}
667+
646668
/// Remove all provenance in the given memory range.
647669
pub fn clear_provenance(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
648670
self.provenance.clear(range, cx)?;

Diff for: compiler/rustc_middle/src/mir/interpret/allocation/provenance_map.rs

+19
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,25 @@ impl<Prov: Provenance> ProvenanceMap<Prov> {
195195

196196
Ok(())
197197
}
198+
199+
/// Overwrites all provenance in the allocation with wildcard provenance.
200+
///
201+
/// Provided for usage in Miri and panics otherwise.
202+
pub fn write_wildcards(&mut self, alloc_size: usize) {
203+
assert!(
204+
Prov::OFFSET_IS_ADDR,
205+
"writing wildcard provenance is not supported when `OFFSET_IS_ADDR` is false"
206+
);
207+
let wildcard = Prov::WILDCARD.unwrap();
208+
209+
// Remove all pointer provenances, then write wildcards into the whole byte range.
210+
self.ptrs.clear();
211+
let last = Size::from_bytes(alloc_size);
212+
let bytes = self.bytes.get_or_insert_with(Box::default);
213+
for offset in Size::ZERO..last {
214+
bytes.insert(offset, wildcard);
215+
}
216+
}
198217
}
199218

200219
/// A partial, owned list of provenance to transfer into another allocation.

Diff for: compiler/rustc_middle/src/mir/interpret/pointer.rs

+9
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ pub trait Provenance: Copy + fmt::Debug + 'static {
6666
/// pointer, and implement ptr-to-int transmutation by stripping provenance.
6767
const OFFSET_IS_ADDR: bool;
6868

69+
/// If wildcard provenance is implemented, contains the unique, general wildcard provenance variant.
70+
const WILDCARD: Option<Self>;
71+
6972
/// Determines how a pointer should be printed.
7073
fn fmt(ptr: &Pointer<Self>, f: &mut fmt::Formatter<'_>) -> fmt::Result;
7174

@@ -168,6 +171,9 @@ impl Provenance for CtfeProvenance {
168171
// so ptr-to-int casts are not possible (since we do not know the global physical offset).
169172
const OFFSET_IS_ADDR: bool = false;
170173

174+
// `CtfeProvenance` does not implement wildcard provenance.
175+
const WILDCARD: Option<Self> = None;
176+
171177
fn fmt(ptr: &Pointer<Self>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172178
// Print AllocId.
173179
fmt::Debug::fmt(&ptr.provenance.alloc_id(), f)?; // propagates `alternate` flag
@@ -197,6 +203,9 @@ impl Provenance for AllocId {
197203
// so ptr-to-int casts are not possible (since we do not know the global physical offset).
198204
const OFFSET_IS_ADDR: bool = false;
199205

206+
// `AllocId` does not implement wildcard provenance.
207+
const WILDCARD: Option<Self> = None;
208+
200209
fn fmt(ptr: &Pointer<Self>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
201210
// Forward `alternate` flag to `alloc_id` printing.
202211
if f.alternate() {

Diff for: src/tools/miri/src/alloc_addresses/mod.rs

+6-4
Original file line numberDiff line numberDiff line change
@@ -286,9 +286,9 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
286286

287287
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
288288
pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
289-
fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
290-
let this = self.eval_context_mut();
291-
let global_state = this.machine.alloc_addresses.get_mut();
289+
fn expose_ptr(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
290+
let this = self.eval_context_ref();
291+
let mut global_state = this.machine.alloc_addresses.borrow_mut();
292292
// In strict mode, we don't need this, so we can save some cycles by not tracking it.
293293
if global_state.provenance_mode == ProvenanceMode::Strict {
294294
return interp_ok(());
@@ -299,8 +299,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
299299
return interp_ok(());
300300
}
301301
trace!("Exposing allocation id {alloc_id:?}");
302-
let global_state = this.machine.alloc_addresses.get_mut();
303302
global_state.exposed.insert(alloc_id);
303+
// Release the global state before we call `expose_tag`, which may call `get_alloc_info_extra`,
304+
// which may need access to the global state.
305+
drop(global_state);
304306
if this.machine.borrow_tracker.is_some() {
305307
this.expose_tag(alloc_id, tag)?;
306308
}

Diff for: src/tools/miri/src/borrow_tracker/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -302,8 +302,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
302302
}
303303
}
304304

305-
fn expose_tag(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
306-
let this = self.eval_context_mut();
305+
fn expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
306+
let this = self.eval_context_ref();
307307
let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
308308
match method {
309309
BorrowTrackerMethod::StackedBorrows => this.sb_expose_tag(alloc_id, tag),

Diff for: src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1011,8 +1011,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
10111011
}
10121012

10131013
/// Mark the given tag as exposed. It was found on a pointer with the given AllocId.
1014-
fn sb_expose_tag(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
1015-
let this = self.eval_context_mut();
1014+
fn sb_expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
1015+
let this = self.eval_context_ref();
10161016

10171017
// Function pointers and dead objects don't have an alloc_extra so we ignore them.
10181018
// This is okay because accessing them is UB anyway, no need for any Stacked Borrows checks.

Diff for: src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -532,8 +532,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
532532
}
533533

534534
/// Mark the given tag as exposed. It was found on a pointer with the given AllocId.
535-
fn tb_expose_tag(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
536-
let this = self.eval_context_mut();
535+
fn tb_expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
536+
let this = self.eval_context_ref();
537537

538538
// Function pointers and dead objects don't have an alloc_extra so we ignore them.
539539
// This is okay because accessing them is UB anyway, no need for any Tree Borrows checks.

Diff for: src/tools/miri/src/machine.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,9 @@ impl interpret::Provenance for Provenance {
270270
/// We use absolute addresses in the `offset` of a `StrictPointer`.
271271
const OFFSET_IS_ADDR: bool = true;
272272

273+
/// Miri implements wildcard provenance.
274+
const WILDCARD: Option<Self> = Some(Provenance::Wildcard);
275+
273276
fn get_alloc_id(self) -> Option<AllocId> {
274277
match self {
275278
Provenance::Concrete { alloc_id, .. } => Some(alloc_id),
@@ -1242,8 +1245,8 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
12421245
/// Called on `ptr as usize` casts.
12431246
/// (Actually computing the resulting `usize` doesn't need machine help,
12441247
/// that's just `Scalar::try_to_int`.)
1245-
fn expose_ptr(ecx: &mut InterpCx<'tcx, Self>, ptr: StrictPointer) -> InterpResult<'tcx> {
1246-
match ptr.provenance {
1248+
fn expose_provenance(ecx: &InterpCx<'tcx, Self>, provenance: Self::Provenance) -> InterpResult<'tcx> {
1249+
match provenance {
12471250
Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
12481251
Provenance::Wildcard => {
12491252
// No need to do anything for wildcard pointers as

Diff for: src/tools/miri/src/shims/native_lib.rs

+34-16
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@ use std::ops::Deref;
33

44
use libffi::high::call as ffi;
55
use libffi::low::CodePtr;
6-
use rustc_abi::{BackendRepr, HasDataLayout};
7-
use rustc_middle::ty::{self as ty, IntTy, UintTy};
6+
use rustc_abi::{BackendRepr, HasDataLayout, Size};
7+
use rustc_middle::{
8+
mir::interpret::Pointer,
9+
ty::{self as ty, IntTy, UintTy},
10+
};
811
use rustc_span::Symbol;
912

1013
use crate::*;
@@ -75,6 +78,11 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
7578
unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
7679
return interp_ok(ImmTy::uninit(dest.layout));
7780
}
81+
ty::RawPtr(..) => {
82+
let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
83+
let ptr = Pointer::new(Provenance::Wildcard, Size::from_bytes(x.addr()));
84+
Scalar::from_pointer(ptr, this)
85+
}
7886
_ => throw_unsup_format!("unsupported return type for native call: {:?}", link_name),
7987
};
8088
interp_ok(ImmTy::from_scalar(scalar, dest.layout))
@@ -152,8 +160,26 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
152160
if !matches!(arg.layout.backend_repr, BackendRepr::Scalar(_)) {
153161
throw_unsup_format!("only scalar argument types are support for native calls")
154162
}
155-
libffi_args.push(imm_to_carg(this.read_immediate(arg)?, this)?);
163+
let imm = this.read_immediate(arg)?;
164+
libffi_args.push(imm_to_carg(&imm, this)?);
165+
// If we are passing a pointer, prepare the memory it points to.
166+
if matches!(arg.layout.ty.kind(), ty::RawPtr(..)) {
167+
let ptr = imm.to_scalar().to_pointer(this)?;
168+
let Some(prov) = ptr.provenance else {
169+
// Pointer without provenance may not access any memory.
170+
continue;
171+
};
172+
// We use `get_alloc_id` for its best-effort behaviour with Wildcard provenance.
173+
let Some(alloc_id) = prov.get_alloc_id() else {
174+
// Wildcard pointer, whatever it points to must be already exposed.
175+
continue;
176+
};
177+
this.prepare_for_native_call(alloc_id, prov)?;
178+
}
156179
}
180+
181+
// FIXME: In the future, we should also call `prepare_for_native_call` on all previously
182+
// exposed allocations, since C may access any of them.
157183

158184
// Convert them to `libffi::high::Arg` type.
159185
let libffi_args = libffi_args
@@ -220,7 +246,7 @@ impl<'a> CArg {
220246

221247
/// Extract the scalar value from the result of reading a scalar from the machine,
222248
/// and convert it to a `CArg`.
223-
fn imm_to_carg<'tcx>(v: ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> {
249+
fn imm_to_carg<'tcx>(v: &ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'tcx, CArg> {
224250
interp_ok(match v.layout.ty.kind() {
225251
// If the primitive provided can be converted to a type matching the type pattern
226252
// then create a `CArg` of this primitive value with the corresponding `CArg` constructor.
@@ -238,18 +264,10 @@ fn imm_to_carg<'tcx>(v: ImmTy<'tcx>, cx: &impl HasDataLayout) -> InterpResult<'t
238264
ty::Uint(UintTy::U64) => CArg::UInt64(v.to_scalar().to_u64()?),
239265
ty::Uint(UintTy::Usize) =>
240266
CArg::USize(v.to_scalar().to_target_usize(cx)?.try_into().unwrap()),
241-
ty::RawPtr(_, mutability) => {
242-
// Arbitrary mutable pointer accesses are not currently supported in Miri.
243-
if mutability.is_mut() {
244-
throw_unsup_format!(
245-
"unsupported mutable pointer type for native call: {}",
246-
v.layout.ty
247-
);
248-
} else {
249-
let s = v.to_scalar().to_pointer(cx)?.addr();
250-
// This relies on the `expose_provenance` in `addr_from_alloc_id`.
251-
CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
252-
}
267+
ty::RawPtr(..) => {
268+
let s = v.to_scalar().to_pointer(cx)?.addr();
269+
// This relies on the `expose_provenance` in `addr_from_alloc_id`.
270+
CArg::RawPtr(std::ptr::with_exposed_provenance_mut(s.bytes_usize()))
253271
}
254272
_ => throw_unsup_format!("unsupported argument type for native call: {}", v.layout.ty),
255273
})

0 commit comments

Comments
 (0)