Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

give some more help for the unusual data races #3145

Merged
merged 2 commits into from
Nov 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 109 additions & 53 deletions src/concurrency/data_race.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
//! on the data-race detection code.

use std::{
borrow::Cow,
cell::{Cell, Ref, RefCell, RefMut},
fmt::Debug,
mem,
Expand Down Expand Up @@ -199,7 +198,7 @@ struct AtomicMemoryCellClocks {
/// are all treated as writes for the purpose
/// of the data-race detector.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
enum WriteType {
enum NaWriteType {
/// Allocate memory.
Allocate,

Expand All @@ -212,12 +211,48 @@ enum WriteType {
/// (Same for `Allocate` above.)
Deallocate,
}
impl WriteType {
fn get_descriptor(self) -> &'static str {

impl NaWriteType {
fn description(self) -> &'static str {
match self {
WriteType::Allocate => "Allocate",
WriteType::Write => "Write",
WriteType::Deallocate => "Deallocate",
NaWriteType::Allocate => "creating a new allocation",
NaWriteType::Write => "non-atomic write",
NaWriteType::Deallocate => "deallocation",
}
}
}

#[derive(Copy, Clone, PartialEq, Eq, Debug)]
enum AccessType {
NaRead,
NaWrite(NaWriteType),
AtomicLoad,
AtomicStore,
AtomicRmw,
}

impl AccessType {
fn description(self) -> &'static str {
match self {
AccessType::NaRead => "non-atomic read",
AccessType::NaWrite(w) => w.description(),
AccessType::AtomicLoad => "atomic load",
AccessType::AtomicStore => "atomic store",
AccessType::AtomicRmw => "atomic read-modify-write",
}
}

fn is_atomic(self) -> bool {
match self {
AccessType::AtomicLoad | AccessType::AtomicStore | AccessType::AtomicRmw => true,
AccessType::NaRead | AccessType::NaWrite(_) => false,
}
}

fn is_read(self) -> bool {
match self {
AccessType::AtomicLoad | AccessType::NaRead => true,
AccessType::NaWrite(_) | AccessType::AtomicStore | AccessType::AtomicRmw => false,
}
}
}
Expand All @@ -234,7 +269,7 @@ struct MemoryCellClocks {
/// The type of operation that the write index represents,
/// either newly allocated memory, a non-atomic write or
/// a deallocation of memory.
write_type: WriteType,
write_type: NaWriteType,

/// The vector-clock of all non-atomic reads that happened since the last non-atomic write
/// (i.e., we join together the "singleton" clocks corresponding to each read). It is reset to
Expand Down Expand Up @@ -265,7 +300,7 @@ impl MemoryCellClocks {
MemoryCellClocks {
read: VClock::default(),
write: (alloc_index, alloc),
write_type: WriteType::Allocate,
write_type: NaWriteType::Allocate,
atomic_ops: None,
}
}
Expand Down Expand Up @@ -488,7 +523,7 @@ impl MemoryCellClocks {
&mut self,
thread_clocks: &mut ThreadClockSet,
index: VectorIdx,
write_type: WriteType,
write_type: NaWriteType,
current_span: Span,
) -> Result<(), DataRace> {
log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, thread_clocks);
Expand Down Expand Up @@ -838,48 +873,45 @@ impl VClockAlloc {
global: &GlobalState,
thread_mgr: &ThreadManager<'_, '_>,
mem_clocks: &MemoryCellClocks,
action: &str,
is_atomic: bool,
access: AccessType,
access_size: Size,
ptr_dbg: Pointer<AllocId>,
) -> InterpResult<'tcx> {
let (current_index, current_clocks) = global.current_thread_state(thread_mgr);
let mut action = Cow::Borrowed(action);
let mut involves_non_atomic = true;
let mut other_size = None; // if `Some`, this was a size-mismatch race
let write_clock;
let (other_action, other_thread, other_clock) =
let (other_access, other_thread, other_clock) =
// First check the atomic-nonatomic cases. If it looks like multiple
// cases apply, this one should take precedence, else it might look like
// we are reporting races between two non-atomic reads.
if !is_atomic &&
if !access.is_atomic() &&
let Some(atomic) = mem_clocks.atomic() &&
let Some(idx) = Self::find_gt_index(&atomic.write_vector, &current_clocks.clock)
{
(format!("Atomic Store"), idx, &atomic.write_vector)
} else if !is_atomic &&
(AccessType::AtomicStore, idx, &atomic.write_vector)
} else if !access.is_atomic() &&
let Some(atomic) = mem_clocks.atomic() &&
let Some(idx) = Self::find_gt_index(&atomic.read_vector, &current_clocks.clock)
{
(format!("Atomic Load"), idx, &atomic.read_vector)
(AccessType::AtomicLoad, idx, &atomic.read_vector)
// Then check races with non-atomic writes/reads.
} else if mem_clocks.write.1 > current_clocks.clock[mem_clocks.write.0] {
write_clock = mem_clocks.write();
(mem_clocks.write_type.get_descriptor().to_owned(), mem_clocks.write.0, &write_clock)
(AccessType::NaWrite(mem_clocks.write_type), mem_clocks.write.0, &write_clock)
} else if let Some(idx) = Self::find_gt_index(&mem_clocks.read, &current_clocks.clock) {
(format!("Read"), idx, &mem_clocks.read)
(AccessType::NaRead, idx, &mem_clocks.read)
// Finally, mixed-size races.
} else if is_atomic && let Some(atomic) = mem_clocks.atomic() && atomic.size != access_size {
} else if access.is_atomic() && let Some(atomic) = mem_clocks.atomic() && atomic.size != access_size {
// This is only a race if we are not synchronized with all atomic accesses, so find
// the one we are not synchronized with.
involves_non_atomic = false;
action = format!("{}-byte (different-size) {action}", access_size.bytes()).into();
other_size = Some(atomic.size);
if let Some(idx) = Self::find_gt_index(&atomic.write_vector, &current_clocks.clock)
{
(format!("{}-byte Atomic Store", atomic.size.bytes()), idx, &atomic.write_vector)
(AccessType::AtomicStore, idx, &atomic.write_vector)
} else if let Some(idx) =
Self::find_gt_index(&atomic.read_vector, &current_clocks.clock)
{
(format!("{}-byte Atomic Load", atomic.size.bytes()), idx, &atomic.read_vector)
(AccessType::AtomicLoad, idx, &atomic.read_vector)
} else {
unreachable!(
"Failed to report data-race for mixed-size access: no race found"
Expand All @@ -892,18 +924,39 @@ impl VClockAlloc {
// Load elaborated thread information about the racing thread actions.
let current_thread_info = global.print_thread_metadata(thread_mgr, current_index);
let other_thread_info = global.print_thread_metadata(thread_mgr, other_thread);
let involves_non_atomic = !access.is_atomic() || !other_access.is_atomic();

// Throw the data-race detection.
let extra = if other_size.is_some() {
assert!(!involves_non_atomic);
Some("overlapping unsynchronized atomic accesses must use the same access size")
} else if access.is_read() && other_access.is_read() {
assert!(involves_non_atomic);
Some(
"overlapping atomic and non-atomic accesses must be synchronized, even if both are read-only",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@saethlin I have been deliberating how to best express this... is "must be synchronized" clear enough? Would "must be ordered" better? Any other ideas?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be sure, is this an example of the UB you are talking about?

error: Undefined Behavior: Data race detected between (1) Atomic Load on thread `tokio-runtime-w` and (2) Read on thread `tokio-runtime-w` at alloc31996+0x20. (2) just happened here
   --> /home/ben/tokio/tokio/src/loom/std/atomic_u32.rs:26:9
    |
26  |         core::ptr::read(self.inner.get() as *const u32)
    |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Data race detected between (1) Atomic Load on thread `tokio-runtime-w` and (2) Read on thread `tokio-runtime-w` at alloc31996+0x20. (2) just happened here
    |
help: and (1) occurred earlier here
   --> /home/ben/tokio/tokio/src/runtime/scheduler/multi_thread/queue.rs:455:28
    |
455 |             let src_tail = self.0.tail.load(Acquire);
    |                            ^^^^^^^^^^^^^^^^^^^^^^^^^

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I prefer "ordered"? At least the way I'd hand-wave the difference between those terms in English is to say that atomic operations provide orderings, then synchronization tools like locks can be built on those. Synchronization sounds like a stronger term to me 🤷

But my biggest question/concern with the diagnostic here is that this being UB will probably be surprising, so I think it would be best if we could preempt people asking questions about why. Is there a short version explanation? Or maybe we could link to rust-lang/unsafe-code-guidelines#345? The Twitter link in the issue is already broken, so the story is already a little confusing. My best reading of that issue is that this is UB because we mostly punt to C++ for our atomic semantics, and C++ seems to make this UB, though it's not clear why, and x86 really wants mixed-size atomics to be UB.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a short version explanation?

"There's no way to write code that does this in C++ without causing UB, and for now we don't want to invent our own memory model."

I am not sure if C++ has a very good reason for making this UB... x86 says it's disallowed but also Linux and Wine do it so it's not like they will be able to break this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK describing mixed-size accesses in these C++-style models is just very hard / hasn't been figured out yet

That's definitely how it seems to me. It would be unfortunate for us to say that atomic and non-atomic reads can produce a data race, especially if it's known to be permissible in formal models.

We've been not reporting this scenario as UB for a long time, so it would be kind of silly to start defensively reporting UB when we're sure that this will be made well-defined in the future.

Copy link
Member Author

@RalfJung RalfJung Oct 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We've been not reporting this scenario as UB for a long time,

We've not supported threads for a long time, and when we did we've not reported any data races for a long time. The fact that we allowed racing atomic and non-atomic accesses was an oversight; I didn't realize that this is not possible in C++.

We have documented for a looooong time that we are using the C++ memory model. Until recently we didn't have any APIs that would let people cause racing atomic and non-atomic accesses (without brute-force operations like transmute). rust-lang/rust#115719 stabilized AtomicX::from_ptr which made this possible for the first time, and as part of stabilization we also clarified the docs to say explicitly that mixing atomic and non-atomic accesses is not allowed. Miri should detect such bugs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you suggesting we link to the docs? People tend to behave as if the docs are stable, they read them perhaps a few times when they are learning APIs then just use them. Unless we do something to prompt them, I do not think it is reasonable to expect an experienced user to notice that the docs have changed.

I'm not opposing reporting UB here, I just expect this one to be very odd to explain to people. And the more we can head that off when they see the UB report, the better.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can add links to the docs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But that is blocked on rust-lang/rust#116762, which puts the relevant docs in a more central place.

)
} else {
None
};
Err(err_machine_stop!(TerminationInfo::DataRace {
involves_non_atomic,
extra,
ptr: ptr_dbg,
op1: RacingOp {
action: other_action.to_string(),
action: if let Some(other_size) = other_size {
format!("{}-byte {}", other_size.bytes(), other_access.description())
} else {
other_access.description().to_owned()
},
thread_info: other_thread_info,
span: other_clock.as_slice()[other_thread.index()].span_data(),
},
op2: RacingOp {
action: action.to_string(),
action: if other_size.is_some() {
format!("{}-byte {}", access_size.bytes(), access.description())
} else {
access.description().to_owned()
},
thread_info: current_thread_info,
span: current_clocks.clock.as_slice()[current_index.index()].span_data(),
},
Expand Down Expand Up @@ -938,8 +991,7 @@ impl VClockAlloc {
global,
&machine.threads,
mem_clocks,
"Read",
/* is_atomic */ false,
AccessType::NaRead,
access_range.size,
Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
);
Expand All @@ -956,7 +1008,7 @@ impl VClockAlloc {
&mut self,
alloc_id: AllocId,
access_range: AllocRange,
write_type: WriteType,
write_type: NaWriteType,
machine: &mut MiriMachine<'_, '_>,
) -> InterpResult<'tcx> {
let current_span = machine.current_span();
Expand All @@ -978,8 +1030,7 @@ impl VClockAlloc {
global,
&machine.threads,
mem_clocks,
write_type.get_descriptor(),
/* is_atomic */ false,
AccessType::NaWrite(write_type),
access_range.size,
Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
);
Expand All @@ -1001,7 +1052,7 @@ impl VClockAlloc {
range: AllocRange,
machine: &mut MiriMachine<'_, '_>,
) -> InterpResult<'tcx> {
self.unique_access(alloc_id, range, WriteType::Write, machine)
self.unique_access(alloc_id, range, NaWriteType::Write, machine)
}

/// Detect data-races for an unsynchronized deallocate operation, will not perform
Expand All @@ -1014,7 +1065,7 @@ impl VClockAlloc {
range: AllocRange,
machine: &mut MiriMachine<'_, '_>,
) -> InterpResult<'tcx> {
self.unique_access(alloc_id, range, WriteType::Deallocate, machine)
self.unique_access(alloc_id, range, NaWriteType::Deallocate, machine)
}
}

Expand Down Expand Up @@ -1104,7 +1155,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
this.validate_atomic_op(
place,
atomic,
"Atomic Load",
AccessType::AtomicLoad,
move |memory, clocks, index, atomic| {
if atomic == AtomicReadOrd::Relaxed {
memory.load_relaxed(&mut *clocks, index, place.layout.size)
Expand All @@ -1126,7 +1177,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
this.validate_atomic_op(
place,
atomic,
"Atomic Store",
AccessType::AtomicStore,
move |memory, clocks, index, atomic| {
if atomic == AtomicWriteOrd::Relaxed {
memory.store_relaxed(clocks, index, place.layout.size)
Expand All @@ -1148,26 +1199,31 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
let release = matches!(atomic, Release | AcqRel | SeqCst);
let this = self.eval_context_mut();
this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
if acquire {
memory.load_acquire(clocks, index, place.layout.size)?;
} else {
memory.load_relaxed(clocks, index, place.layout.size)?;
}
if release {
memory.rmw_release(clocks, index, place.layout.size)
} else {
memory.rmw_relaxed(clocks, index, place.layout.size)
}
})
this.validate_atomic_op(
place,
atomic,
AccessType::AtomicRmw,
move |memory, clocks, index, _| {
if acquire {
memory.load_acquire(clocks, index, place.layout.size)?;
} else {
memory.load_relaxed(clocks, index, place.layout.size)?;
}
if release {
memory.rmw_release(clocks, index, place.layout.size)
} else {
memory.rmw_relaxed(clocks, index, place.layout.size)
}
},
)
}

/// Generic atomic operation implementation
fn validate_atomic_op<A: Debug + Copy>(
&self,
place: &MPlaceTy<'tcx, Provenance>,
atomic: A,
description: &str,
access: AccessType,
mut op: impl FnMut(
&mut MemoryCellClocks,
&mut ThreadClockSet,
Expand All @@ -1176,6 +1232,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
) -> Result<(), DataRace>,
) -> InterpResult<'tcx> {
let this = self.eval_context_ref();
assert!(access.is_atomic());
if let Some(data_race) = &this.machine.data_race {
if data_race.race_detecting() {
let size = place.layout.size;
Expand All @@ -1185,7 +1242,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
log::trace!(
"Atomic op({}) with ordering {:?} on {:?} (size={})",
description,
access.description(),
&atomic,
place.ptr(),
size.bytes()
Expand All @@ -1207,8 +1264,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
data_race,
&this.machine.threads,
mem_clocks,
description,
/* is_atomic */ true,
access,
place.layout.size,
Pointer::new(
alloc_id,
Expand Down
19 changes: 12 additions & 7 deletions src/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub enum TerminationInfo {
ptr: Pointer,
op1: RacingOp,
op2: RacingOp,
extra: Option<&'static str>,
},
}

Expand Down Expand Up @@ -75,7 +76,7 @@ impl fmt::Display for TerminationInfo {
write!(f, "multiple definitions of symbol `{link_name}`"),
SymbolShimClashing { link_name, .. } =>
write!(f, "found `{link_name}` symbol definition that clashes with a built-in shim",),
DataRace { involves_non_atomic, ptr, op1, op2 } =>
DataRace { involves_non_atomic, ptr, op1, op2, .. } =>
write!(
f,
"{} detected between (1) {} on {} and (2) {} on {} at {ptr:?}. (2) just happened here",
Expand Down Expand Up @@ -266,12 +267,16 @@ pub fn report_error<'tcx, 'mir>(
vec![(Some(*span), format!("the `{link_name}` symbol is defined here"))],
Int2PtrWithStrictProvenance =>
vec![(None, format!("use Strict Provenance APIs (https://doc.rust-lang.org/nightly/std/ptr/index.html#strict-provenance, https://crates.io/crates/sptr) instead"))],
DataRace { op1, .. } =>
vec![
(Some(op1.span), format!("and (1) occurred earlier here")),
(None, format!("this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior")),
(None, format!("see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information")),
],
DataRace { op1, extra, .. } => {
let mut helps = vec![(Some(op1.span), format!("and (1) occurred earlier here"))];
if let Some(extra) = extra {
helps.push((None, format!("{extra}")))
}
helps.push((None, format!("this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior")));
helps.push((None, format!("see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information")));
helps
}
,
_ => vec![],
};
(title, helps)
Expand Down
2 changes: 1 addition & 1 deletion tests/fail/both_borrows/retag_data_race_write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ fn thread_1(p: SendPtr) {
fn thread_2(p: SendPtr) {
let p = p.0;
unsafe {
*p = 5; //~ ERROR: /Data race detected between \(1\) (Read|Write) on thread `<unnamed>` and \(2\) Write on thread `<unnamed>`/
*p = 5; //~ ERROR: /Data race detected between \(1\) non-atomic (read|write) on thread `<unnamed>` and \(2\) non-atomic write on thread `<unnamed>`/
}
}

Expand Down
4 changes: 2 additions & 2 deletions tests/fail/both_borrows/retag_data_race_write.stack.stderr
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
error: Undefined Behavior: Data race detected between (1) Write on thread `<unnamed>` and (2) Write on thread `<unnamed>` at ALLOC. (2) just happened here
error: Undefined Behavior: Data race detected between (1) non-atomic write on thread `<unnamed>` and (2) non-atomic write on thread `<unnamed>` at ALLOC. (2) just happened here
--> $DIR/retag_data_race_write.rs:LL:CC
|
LL | *p = 5;
| ^^^^^^ Data race detected between (1) Write on thread `<unnamed>` and (2) Write on thread `<unnamed>` at ALLOC. (2) just happened here
| ^^^^^^ Data race detected between (1) non-atomic write on thread `<unnamed>` and (2) non-atomic write on thread `<unnamed>` at ALLOC. (2) just happened here
|
help: and (1) occurred earlier here
--> $DIR/retag_data_race_write.rs:LL:CC
Expand Down
Loading