Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

miri: treat non-memory local variables properly for data race detection #129828

Merged
merged 2 commits into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -540,10 +540,29 @@ pub trait Machine<'tcx>: Sized {
Ok(ReturnAction::Normal)
}

/// Called immediately after an "immediate" local variable is read
/// (i.e., this is called for reads that do not end up accessing addressable memory).
#[inline(always)]
fn after_local_read(_ecx: &InterpCx<'tcx, Self>, _local: mir::Local) -> InterpResult<'tcx> {
Ok(())
}

/// Called immediately after an "immediate" local variable is assigned a new value
/// (i.e., this is called for writes that do not end up in memory).
/// `storage_live` indicates whether this is the initial write upon `StorageLive`.
#[inline(always)]
fn after_local_write(
_ecx: &mut InterpCx<'tcx, Self>,
_local: mir::Local,
_storage_live: bool,
) -> InterpResult<'tcx> {
Ok(())
}

/// Called immediately after actual memory was allocated for a local
/// but before the local's stack frame is updated to point to that memory.
#[inline(always)]
fn after_local_allocated(
fn after_local_moved_to_memory(
_ecx: &mut InterpCx<'tcx, Self>,
_local: mir::Local,
_mplace: &MPlaceTy<'tcx, Self::Provenance>,
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
);
res
}

pub(super) fn validation_in_progress(&self) -> bool {
self.memory.validation_in_progress
}
}

#[doc(hidden)]
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_const_eval/src/interpret/operand.rs
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
if matches!(op, Operand::Immediate(_)) {
assert!(!layout.is_unsized());
}
M::after_local_read(self, local)?;
Ok(OpTy { op, layout })
}

Expand Down
31 changes: 22 additions & 9 deletions compiler/rustc_const_eval/src/interpret/place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -504,15 +504,13 @@ where
&self,
local: mir::Local,
) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> {
// Other parts of the system rely on `Place::Local` never being unsized.
// So we eagerly check here if this local has an MPlace, and if yes we use it.
let frame = self.frame();
let layout = self.layout_of_local(frame, local, None)?;
let place = if layout.is_sized() {
// We can just always use the `Local` for sized values.
Place::Local { local, offset: None, locals_addr: frame.locals_addr() }
} else {
// Unsized `Local` isn't okay (we cannot store the metadata).
// Other parts of the system rely on `Place::Local` never being unsized.
match frame.locals[local].access()? {
Operand::Immediate(_) => bug!(),
Operand::Indirect(mplace) => Place::Ptr(*mplace),
Expand Down Expand Up @@ -565,7 +563,10 @@ where
place: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<
'tcx,
Either<MPlaceTy<'tcx, M::Provenance>, (&mut Immediate<M::Provenance>, TyAndLayout<'tcx>)>,
Either<
MPlaceTy<'tcx, M::Provenance>,
(&mut Immediate<M::Provenance>, TyAndLayout<'tcx>, mir::Local),
>,
> {
Ok(match place.to_place().as_mplace_or_local() {
Left(mplace) => Left(mplace),
Expand All @@ -584,7 +585,7 @@ where
}
Operand::Immediate(local_val) => {
// The local still has the optimized representation.
Right((local_val, layout))
Right((local_val, layout, local))
}
}
}
Expand Down Expand Up @@ -646,9 +647,13 @@ where
assert!(dest.layout().is_sized(), "Cannot write unsized immediate data");

match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, local_layout)) => {
Right((local_val, local_layout, local)) => {
// Local can be updated in-place.
*local_val = src;
// Call the machine hook (the data race detector needs to know about this write).
if !self.validation_in_progress() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these checks for valiation_in_progress() just an optimization, or are they required for correctness?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This matches the equivalent logic in memory.rs. There it is definitely required; we have some "administrative reads" that we don't want to show up in the aliasing model (specifically, the ones added in #128942).

Here I am not sure if it is required, but it would seem odd to not make this consistent with the logic in memory.rs.

M::after_local_write(self, local, /*storage_live*/ false)?;
}
// Double-check that the value we are storing and the local fit to each other.
if cfg!(debug_assertions) {
src.assert_matches_abi(local_layout.abi, self);
Expand Down Expand Up @@ -717,8 +722,12 @@ where
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, _local_layout)) => {
Right((local_val, _local_layout, local)) => {
*local_val = Immediate::Uninit;
// Call the machine hook (the data race detector needs to know about this write).
if !self.validation_in_progress() {
M::after_local_write(self, local, /*storage_live*/ false)?;
}
}
Left(mplace) => {
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
Expand All @@ -737,8 +746,12 @@ where
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, _local_layout)) => {
Right((local_val, _local_layout, local)) => {
local_val.clear_provenance()?;
// Call the machine hook (the data race detector needs to know about this write).
if !self.validation_in_progress() {
M::after_local_write(self, local, /*storage_live*/ false)?;
}
}
Left(mplace) => {
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
Expand Down Expand Up @@ -944,7 +957,7 @@ where
mplace.mplace,
)?;
}
M::after_local_allocated(self, local, &mplace)?;
M::after_local_moved_to_memory(self, local, &mplace)?;
// Now we can call `access_mut` again, asserting it goes well, and actually
// overwrite things. This points to the entire allocation, not just the part
// the place refers to, i.e. we do this before we apply `offset`.
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_const_eval/src/interpret/stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,8 +534,11 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let dest_place = self.allocate_dyn(layout, MemoryKind::Stack, meta)?;
Operand::Indirect(*dest_place.mplace())
} else {
assert!(!meta.has_meta()); // we're dropping the metadata
// Just make this an efficient immediate.
assert!(!meta.has_meta()); // we're dropping the metadata
// Make sure the machine knows this "write" is happening. (This is important so that
// races involving local variable allocation can be detected by Miri.)
M::after_local_write(self, local, /*storage_live*/ true)?;
// Note that not calling `layout_of` here does have one real consequence:
// if the type is too big, we'll only notice this when the local is actually initialized,
// which is a bit too late -- we should ideally notice this already here, when the memory
Expand Down
98 changes: 98 additions & 0 deletions src/tools/miri/src/concurrency/data_race.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ use std::{
};

use rustc_ast::Mutability;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::FxHashSet;
use rustc_index::{Idx, IndexVec};
use rustc_middle::{mir, ty::Ty};
Expand Down Expand Up @@ -1121,6 +1122,103 @@ impl VClockAlloc {
}
}

/// Vector clock state for a stack frame (tracking the local variables
/// that do not have an allocation yet).
#[derive(Debug, Default)]
pub struct FrameState {
local_clocks: RefCell<FxHashMap<mir::Local, LocalClocks>>,
}

/// Stripped-down version of [`MemoryCellClocks`] for the clocks we need to keep track
/// of in a local that does not yet have addressable memory -- and hence can only
/// be accessed from the thread its stack frame belongs to, and cannot be access atomically.
#[derive(Debug)]
struct LocalClocks {
write: VTimestamp,
write_type: NaWriteType,
read: VTimestamp,
}

impl Default for LocalClocks {
fn default() -> Self {
Self { write: VTimestamp::ZERO, write_type: NaWriteType::Allocate, read: VTimestamp::ZERO }
}
}

impl FrameState {
pub fn local_write(&self, local: mir::Local, storage_live: bool, machine: &MiriMachine<'_>) {
let current_span = machine.current_span();
let global = machine.data_race.as_ref().unwrap();
if global.race_detecting() {
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
// This should do the same things as `MemoryCellClocks::write_race_detect`.
if !current_span.is_dummy() {
thread_clocks.clock.index_mut(index).span = current_span;
}
let mut clocks = self.local_clocks.borrow_mut();
if storage_live {
let new_clocks = LocalClocks {
write: thread_clocks.clock[index],
write_type: NaWriteType::Allocate,
read: VTimestamp::ZERO,
};
// There might already be an entry in the map for this, if the local was previously
// live already.
clocks.insert(local, new_clocks);
} else {
// This can fail to exist if `race_detecting` was false when the allocation
// occurred, in which case we can backdate this to the beginning of time.
let clocks = clocks.entry(local).or_insert_with(Default::default);
clocks.write = thread_clocks.clock[index];
clocks.write_type = NaWriteType::Write;
}
}
}

pub fn local_read(&self, local: mir::Local, machine: &MiriMachine<'_>) {
let current_span = machine.current_span();
let global = machine.data_race.as_ref().unwrap();
if global.race_detecting() {
let (index, mut thread_clocks) = global.active_thread_state_mut(&machine.threads);
// This should do the same things as `MemoryCellClocks::read_race_detect`.
if !current_span.is_dummy() {
thread_clocks.clock.index_mut(index).span = current_span;
}
thread_clocks.clock.index_mut(index).set_read_type(NaReadType::Read);
// This can fail to exist if `race_detecting` was false when the allocation
// occurred, in which case we can backdate this to the beginning of time.
let mut clocks = self.local_clocks.borrow_mut();
let clocks = clocks.entry(local).or_insert_with(Default::default);
clocks.read = thread_clocks.clock[index];
}
}

pub fn local_moved_to_memory(
&self,
local: mir::Local,
alloc: &mut VClockAlloc,
machine: &MiriMachine<'_>,
) {
let global = machine.data_race.as_ref().unwrap();
if global.race_detecting() {
let (index, _thread_clocks) = global.active_thread_state_mut(&machine.threads);
// Get the time the last write actually happened. This can fail to exist if
// `race_detecting` was false when the write occurred, in that case we can backdate this
// to the beginning of time.
let local_clocks = self.local_clocks.borrow_mut().remove(&local).unwrap_or_default();
for (_mem_clocks_range, mem_clocks) in alloc.alloc_ranges.get_mut().iter_mut_all() {
// The initialization write for this already happened, just at the wrong timestamp.
// Check that the thread index matches what we expect.
assert_eq!(mem_clocks.write.0, index);
// Convert the local's clocks into memory clocks.
mem_clocks.write = (index, local_clocks.write);
mem_clocks.write_type = local_clocks.write_type;
mem_clocks.read = VClock::new_with_index(index, local_clocks.read);
}
}
}
}

impl<'tcx> EvalContextPrivExt<'tcx> for MiriInterpCx<'tcx> {}
trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
/// Temporarily allow data-races to occur. This should only be used in
Expand Down
4 changes: 3 additions & 1 deletion src/tools/miri/src/concurrency/thread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,9 @@ impl<'tcx> ThreadManager<'tcx> {
}

/// Mutably borrow the stack of the active thread.
fn active_thread_stack_mut(&mut self) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
pub fn active_thread_stack_mut(
&mut self,
) -> &mut Vec<Frame<'tcx, Provenance, FrameExtra<'tcx>>> {
&mut self.threads[self.active_thread].stack
}
pub fn all_stacks(
Expand Down
6 changes: 6 additions & 0 deletions src/tools/miri/src/concurrency/vector_clock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,13 +130,19 @@ impl Ord for VTimestamp {
/// also this means that there is only one unique valid length
/// for each set of vector clock values and hence the PartialEq
/// and Eq derivations are correct.
///
/// This means we cannot represent a clock where the last entry is a timestamp-0 read that occurs
/// because of a retag. That's fine, all it does is risk wrong diagnostics in a extreme corner case.
#[derive(PartialEq, Eq, Default, Debug)]
pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>);

impl VClock {
/// Create a new vector-clock containing all zeros except
/// for a value at the given index
pub(super) fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock {
if timestamp.time() == 0 {
return VClock::default();
}
let len = index.index() + 1;
let mut vec = smallvec::smallvec![VTimestamp::ZERO; len];
vec[index.index()] = timestamp;
Expand Down
55 changes: 50 additions & 5 deletions src/tools/miri/src/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,24 +81,42 @@ pub struct FrameExtra<'tcx> {
/// an additional bit of "salt" into the cache key. This salt is fixed per-frame
/// so that within a call, a const will have a stable address.
salt: usize,

/// Data race detector per-frame data.
pub data_race: Option<data_race::FrameState>,
}

impl<'tcx> std::fmt::Debug for FrameExtra<'tcx> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Omitting `timing`, it does not support `Debug`.
let FrameExtra { borrow_tracker, catch_unwind, timing: _, is_user_relevant: _, salt: _ } =
self;
let FrameExtra {
borrow_tracker,
catch_unwind,
timing: _,
is_user_relevant,
salt,
data_race,
} = self;
f.debug_struct("FrameData")
.field("borrow_tracker", borrow_tracker)
.field("catch_unwind", catch_unwind)
.field("is_user_relevant", is_user_relevant)
.field("salt", salt)
.field("data_race", data_race)
.finish()
}
}

impl VisitProvenance for FrameExtra<'_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let FrameExtra { catch_unwind, borrow_tracker, timing: _, is_user_relevant: _, salt: _ } =
self;
let FrameExtra {
catch_unwind,
borrow_tracker,
timing: _,
is_user_relevant: _,
salt: _,
data_race: _,
} = self;

catch_unwind.visit_provenance(visit);
borrow_tracker.visit_provenance(visit);
Expand Down Expand Up @@ -1446,6 +1464,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
timing,
is_user_relevant: ecx.machine.is_user_relevant(&frame),
salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL,
data_race: ecx.machine.data_race.as_ref().map(|_| data_race::FrameState::default()),
};

Ok(frame.with_extra(extra))
Expand Down Expand Up @@ -1551,17 +1570,43 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
res
}

fn after_local_allocated(
fn after_local_read(ecx: &InterpCx<'tcx, Self>, local: mir::Local) -> InterpResult<'tcx> {
if let Some(data_race) = &ecx.frame().extra.data_race {
data_race.local_read(local, &ecx.machine);
}
Ok(())
}

fn after_local_write(
ecx: &mut InterpCx<'tcx, Self>,
local: mir::Local,
storage_live: bool,
) -> InterpResult<'tcx> {
if let Some(data_race) = &ecx.frame().extra.data_race {
data_race.local_write(local, storage_live, &ecx.machine);
}
Ok(())
}

fn after_local_moved_to_memory(
ecx: &mut InterpCx<'tcx, Self>,
local: mir::Local,
mplace: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx> {
let Some(Provenance::Concrete { alloc_id, .. }) = mplace.ptr().provenance else {
panic!("after_local_allocated should only be called on fresh allocations");
};
// Record the span where this was allocated: the declaration of the local.
let local_decl = &ecx.frame().body().local_decls[local];
let span = local_decl.source_info.span;
ecx.machine.allocation_spans.borrow_mut().insert(alloc_id, (span, None));
// The data race system has to fix the clocks used for this write.
let (alloc_info, machine) = ecx.get_alloc_extra_mut(alloc_id)?;
if let Some(data_race) =
&machine.threads.active_thread_stack().last().unwrap().extra.data_race
{
data_race.local_moved_to_memory(local, alloc_info.data_race.as_mut().unwrap(), machine);
}
Ok(())
}

Expand Down
Loading
Loading