when reusing an address, most of the time only reuse from the current…

… thread
rust-lang · bors · Apr 19, 2024 · Apr 16, 2024 · Apr 18, 2024 · Apr 18, 2024
commit 2155a302a78ee0fc95aecf093ac35739d0cd562b
diff --git a/README.md b/README.md
@@ -298,8 +298,13 @@ environment variable. We first document the most relevant and most commonly used
 * `-Zmiri-address-reuse-rate=<rate>` changes the probability that a freed *non-stack* allocation
   will be added to the pool for address reuse, and the probability that a new *non-stack* allocation
   will be taken from the pool. Stack allocations never get added to or taken from the pool. The
-  default is `0.5`. Note that a very high reuse rate can mask concurrency bugs as address
-  reuse induces synchronization between threads.
+  default is `0.5`.
+* `-Zmiri-address-reuse-cross-thread-rate=<rate>` changes the probability that an allocation which
+  attempts to reuse a previously freed block of memory will also consider blocks freed by *other
+  threads*. The default is `0.1`, which means by default, in 90% of the cases where an address reuse
+  attempt is made, only addresses from the same thread will be considered. Reusing an address from
+  another thread induces synchronization between those threads, which can mask data races and weak
+  memory bugs.
 * `-Zmiri-compare-exchange-weak-failure-rate=<rate>` changes the failure rate of
   `compare_exchange_weak` operations. The default is `0.8` (so 4 out of 5 weak ops will fail).
   You can change it to any value between `0.0` and `1.0`, where `1.0` means it

diff --git a/src/alloc_addresses/mod.rs b/src/alloc_addresses/mod.rs
@@ -78,7 +78,7 @@ impl GlobalStateInner {
         GlobalStateInner {
             int_to_ptr_map: Vec::default(),
             base_addr: FxHashMap::default(),
-            reuse: ReusePool::new(config.address_reuse_rate),
+            reuse: ReusePool::new(config),
             exposed: FxHashSet::default(),
             next_base_addr: stack_addr,
             provenance_mode: config.provenance_mode,
@@ -164,9 +164,13 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                 assert!(!matches!(kind, AllocKind::Dead));
 
                 // This allocation does not have a base address yet, pick or reuse one.
-                let base_addr = if let Some((reuse_addr, clock)) =
-                    global_state.reuse.take_addr(&mut *rng, size, align, memory_kind)
-                {
+                let base_addr = if let Some((reuse_addr, clock)) = global_state.reuse.take_addr(
+                    &mut *rng,
+                    size,
+                    align,
+                    memory_kind,
+                    ecx.get_active_thread(),
+                ) {
                     if let Some(data_race) = &ecx.machine.data_race {
                         data_race.validate_lock_acquire(&clock, ecx.get_active_thread());
                     }
@@ -363,12 +367,13 @@ impl<'mir, 'tcx> MiriMachine<'mir, 'tcx> {
         // `alloc_id_from_addr` any more.
         global_state.exposed.remove(&dead_id);
         // Also remember this address for future reuse.
-        global_state.reuse.add_addr(rng, addr, size, align, kind, || {
+        let thread = self.threads.get_active_thread_id();
+        global_state.reuse.add_addr(rng, addr, size, align, kind, thread, || {
             let mut clock = concurrency::VClock::default();
             if let Some(data_race) = &self.data_race {
                 data_race.validate_lock_release(
                     &mut clock,
-                    self.threads.get_active_thread_id(),
+                    thread,
                     self.threads.active_thread_ref().current_span(),
                 );
             }

diff --git a/src/alloc_addresses/reuse_pool.rs b/src/alloc_addresses/reuse_pool.rs
@@ -4,7 +4,7 @@ use rand::Rng;
 
 use rustc_target::abi::{Align, Size};
 
-use crate::{concurrency::VClock, MemoryKind};
+use crate::{concurrency::VClock, MemoryKind, MiriConfig, ThreadId};
 
 const MAX_POOL_SIZE: usize = 64;
 
@@ -15,23 +15,28 @@ const MAX_POOL_SIZE: usize = 64;
 #[derive(Debug)]
 pub struct ReusePool {
     address_reuse_rate: f64,
+    address_reuse_cross_thread_rate: f64,
     /// The i-th element in `pool` stores allocations of alignment `2^i`. We store these reusable
-    /// allocations as address-size pairs, the list must be sorted by the size.
+    /// allocations as address-size pairs, the list must be sorted by the size and then the thread ID.
     ///
     /// Each of these maps has at most MAX_POOL_SIZE elements, and since alignment is limited to
     /// less than 64 different possible value, that bounds the overall size of the pool.
     ///
-    /// We also store the clock from the thread that donated this pool element,
+    /// We also store the ID and the data-race clock of the thread that donated this pool element,
     /// to ensure synchronization with the thread that picks up this address.
-    pool: Vec<Vec<(u64, Size, VClock)>>,
+    pool: Vec<Vec<(u64, Size, ThreadId, VClock)>>,
 }
 
 impl ReusePool {
-    pub fn new(address_reuse_rate: f64) -> Self {
-        ReusePool { address_reuse_rate, pool: vec![] }
+    pub fn new(config: &MiriConfig) -> Self {
+        ReusePool {
+            address_reuse_rate: config.address_reuse_rate,
+            address_reuse_cross_thread_rate: config.address_reuse_cross_thread_rate,
+            pool: vec![],
+        }
     }
 
-    fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size, VClock)> {
+    fn subpool(&mut self, align: Align) -> &mut Vec<(u64, Size, ThreadId, VClock)> {
         let pool_idx: usize = align.bytes().trailing_zeros().try_into().unwrap();
         if self.pool.len() <= pool_idx {
             self.pool.resize(pool_idx + 1, Vec::new());
@@ -46,6 +51,7 @@ impl ReusePool {
         size: Size,
         align: Align,
         kind: MemoryKind,
+        thread: ThreadId,
         clock: impl FnOnce() -> VClock,
     ) {
         // Let's see if we even want to remember this address.
@@ -55,18 +61,21 @@ impl ReusePool {
         if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
             return;
         }
+        let clock = clock();
         // Determine the pool to add this to, and where in the pool to put it.
         let subpool = self.subpool(align);
-        let pos = subpool.partition_point(|(_addr, other_size, _)| *other_size < size);
+        let pos = subpool.partition_point(|(_addr, other_size, other_thread, _)| {
+            (*other_size, *other_thread) < (size, thread)
+        });
         // Make sure the pool does not grow too big.
         if subpool.len() >= MAX_POOL_SIZE {
             // Pool full. Replace existing element, or last one if this would be even bigger.
             let clamped_pos = pos.min(subpool.len() - 1);
-            subpool[clamped_pos] = (addr, size, clock());
+            subpool[clamped_pos] = (addr, size, thread, clock);
             return;
         }
         // Add address to pool, at the right position.
-        subpool.insert(pos, (addr, size, clock()));
+        subpool.insert(pos, (addr, size, thread, clock));
     }
 
     pub fn take_addr(
@@ -75,21 +84,32 @@ impl ReusePool {
         size: Size,
         align: Align,
         kind: MemoryKind,
+        thread: ThreadId,
     ) -> Option<(u64, VClock)> {
         // Determine whether we'll even attempt a reuse. As above, we don't do reuse for stack addresses.
         if kind == MemoryKind::Stack || !rng.gen_bool(self.address_reuse_rate) {
             return None;
         }
+        let cross_thread_reuse = rng.gen_bool(self.address_reuse_cross_thread_rate);
         // Determine the pool to take this from.
         let subpool = self.subpool(align);
         // Let's see if we can find something of the right size. We want to find the full range of
-        // such items, beginning with the first, so we can't use `binary_search_by_key`.
-        let begin = subpool.partition_point(|(_addr, other_size, _)| *other_size < size);
+        // such items, beginning with the first, so we can't use `binary_search_by_key`. If we do
+        // *not* want to consider other thread's allocations, we effectively use the lexicographic
+        // order on `(size, thread)`.
+        let begin = subpool.partition_point(|(_addr, other_size, other_thread, _)| {
+            *other_size < size
+                || (*other_size == size && !cross_thread_reuse && *other_thread < thread)
+        });
         let mut end = begin;
-        while let Some((_addr, other_size, _)) = subpool.get(end) {
+        while let Some((_addr, other_size, other_thread, _)) = subpool.get(end) {
             if *other_size != size {
                 break;
             }
+            if !cross_thread_reuse && *other_thread != thread {
+                // We entered the allocations of another thread.
+                break;
+            }
             end += 1;
         }
         if end == begin {
@@ -99,8 +119,9 @@ impl ReusePool {
         // Pick a random element with the desired size.
         let idx = rng.gen_range(begin..end);
         // Remove it from the pool and return.
-        let (chosen_addr, chosen_size, clock) = subpool.remove(idx);
+        let (chosen_addr, chosen_size, chosen_thread, clock) = subpool.remove(idx);
         debug_assert!(chosen_size >= size && chosen_addr % align.bytes() == 0);
+        debug_assert!(cross_thread_reuse || chosen_thread == thread);
         Some((chosen_addr, clock))
     }
 }
diff --git a/src/bin/miri.rs b/src/bin/miri.rs
@@ -307,6 +307,15 @@ fn parse_comma_list<T: FromStr>(input: &str) -> Result<Vec<T>, T::Err> {
     input.split(',').map(str::parse::<T>).collect()
 }
 
+/// Parses the input as a float in the range from 0.0 to 1.0 (inclusive).
+fn parse_rate(input: &str) -> Result<f64, &'static str> {
+    match input.parse::<f64>() {
+        Ok(rate) if rate >= 0.0 && rate <= 1.0 => Ok(rate),
+        Ok(_) => Err("must be between `0.0` and `1.0`"),
+        Err(_) => Err("requires a `f64` between `0.0` and `1.0`"),
+    }
+}
+
 #[cfg(any(target_os = "linux", target_os = "macos"))]
 fn jemalloc_magic() {
     // These magic runes are copied from
@@ -499,14 +508,9 @@ fn main() {
         } else if let Some(param) = arg.strip_prefix("-Zmiri-env-forward=") {
             miri_config.forwarded_env_vars.push(param.to_owned());
         } else if let Some(param) = arg.strip_prefix("-Zmiri-track-pointer-tag=") {
-            let ids: Vec<u64> = match parse_comma_list(param) {
-                Ok(ids) => ids,
-                Err(err) =>
-                    show_error!(
-                        "-Zmiri-track-pointer-tag requires a comma separated list of valid `u64` arguments: {}",
-                        err
-                    ),
-            };
+            let ids: Vec<u64> = parse_comma_list(param).unwrap_or_else(|err| {
+                show_error!("-Zmiri-track-pointer-tag requires a comma separated list of valid `u64` arguments: {err}")
+            });
             for id in ids.into_iter().map(miri::BorTag::new) {
                 if let Some(id) = id {
                     miri_config.tracked_pointer_tags.insert(id);
@@ -515,14 +519,9 @@ fn main() {
                 }
             }
         } else if let Some(param) = arg.strip_prefix("-Zmiri-track-call-id=") {
-            let ids: Vec<u64> = match parse_comma_list(param) {
-                Ok(ids) => ids,
-                Err(err) =>
-                    show_error!(
-                        "-Zmiri-track-call-id requires a comma separated list of valid `u64` arguments: {}",
-                        err
-                    ),
-            };
+            let ids: Vec<u64> = parse_comma_list(param).unwrap_or_else(|err| {
+                show_error!("-Zmiri-track-call-id requires a comma separated list of valid `u64` arguments: {err}")
+            });
             for id in ids.into_iter().map(miri::CallId::new) {
                 if let Some(id) = id {
                     miri_config.tracked_call_ids.insert(id);
@@ -531,70 +530,37 @@ fn main() {
                 }
             }
         } else if let Some(param) = arg.strip_prefix("-Zmiri-track-alloc-id=") {
-            let ids: Vec<miri::AllocId> = match parse_comma_list::<NonZero<u64>>(param) {
-                Ok(ids) => ids.into_iter().map(miri::AllocId).collect(),
-                Err(err) =>
-                    show_error!(
-                        "-Zmiri-track-alloc-id requires a comma separated list of valid non-zero `u64` arguments: {}",
-                        err
-                    ),
-            };
-            miri_config.tracked_alloc_ids.extend(ids);
+            let ids = parse_comma_list::<NonZero<u64>>(param).unwrap_or_else(|err| {
+                show_error!("-Zmiri-track-alloc-id requires a comma separated list of valid non-zero `u64` arguments: {err}")
+            });
+            miri_config.tracked_alloc_ids.extend(ids.into_iter().map(miri::AllocId));
         } else if arg == "-Zmiri-track-alloc-accesses" {
             miri_config.track_alloc_accesses = true;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-address-reuse-rate=") {
-            let rate = match param.parse::<f64>() {
-                Ok(rate) if rate >= 0.0 && rate <= 1.0 => rate,
-                Ok(_) =>
-                    show_error!(
-                        "-Zmiri-compare-exchange-weak-failure-rate must be between `0.0` and `1.0`"
-                    ),
-                Err(err) =>
-                    show_error!(
-                        "-Zmiri-compare-exchange-weak-failure-rate requires a `f64` between `0.0` and `1.0`: {}",
-                        err
-                    ),
-            };
-            miri_config.address_reuse_rate = rate;
+            miri_config.address_reuse_rate = parse_rate(param)
+                .unwrap_or_else(|err| show_error!("-Zmiri-address-reuse-rate {err}"));
+        } else if let Some(param) = arg.strip_prefix("-Zmiri-address-reuse-cross-thread-rate=") {
+            miri_config.address_reuse_cross_thread_rate = parse_rate(param)
+                .unwrap_or_else(|err| show_error!("-Zmiri-address-reuse-cross-thread-rate {err}"));
         } else if let Some(param) = arg.strip_prefix("-Zmiri-compare-exchange-weak-failure-rate=") {
-            let rate = match param.parse::<f64>() {
-                Ok(rate) if rate >= 0.0 && rate <= 1.0 => rate,
-                Ok(_) =>
-                    show_error!(
-                        "-Zmiri-compare-exchange-weak-failure-rate must be between `0.0` and `1.0`"
-                    ),
-                Err(err) =>
-                    show_error!(
-                        "-Zmiri-compare-exchange-weak-failure-rate requires a `f64` between `0.0` and `1.0`: {}",
-                        err
-                    ),
-            };
-            miri_config.cmpxchg_weak_failure_rate = rate;
+            miri_config.cmpxchg_weak_failure_rate = parse_rate(param).unwrap_or_else(|err| {
+                show_error!("-Zmiri-compare-exchange-weak-failure-rate {err}")
+            });
         } else if let Some(param) = arg.strip_prefix("-Zmiri-preemption-rate=") {
-            let rate = match param.parse::<f64>() {
-                Ok(rate) if rate >= 0.0 && rate <= 1.0 => rate,
-                Ok(_) => show_error!("-Zmiri-preemption-rate must be between `0.0` and `1.0`"),
-                Err(err) =>
-                    show_error!(
-                        "-Zmiri-preemption-rate requires a `f64` between `0.0` and `1.0`: {}",
-                        err
-                    ),
-            };
-            miri_config.preemption_rate = rate;
+            miri_config.preemption_rate =
+                parse_rate(param).unwrap_or_else(|err| show_error!("-Zmiri-preemption-rate {err}"));
         } else if arg == "-Zmiri-report-progress" {
             // This makes it take a few seconds between progress reports on my laptop.
             miri_config.report_progress = Some(1_000_000);
         } else if let Some(param) = arg.strip_prefix("-Zmiri-report-progress=") {
-            let interval = match param.parse::<u32>() {
-                Ok(i) => i,
-                Err(err) => show_error!("-Zmiri-report-progress requires a `u32`: {}", err),
-            };
+            let interval = param.parse::<u32>().unwrap_or_else(|err| {
+                show_error!("-Zmiri-report-progress requires a `u32`: {}", err)
+            });
             miri_config.report_progress = Some(interval);
         } else if let Some(param) = arg.strip_prefix("-Zmiri-provenance-gc=") {
-            let interval = match param.parse::<u32>() {
-                Ok(i) => i,
-                Err(err) => show_error!("-Zmiri-provenance-gc requires a `u32`: {}", err),
-            };
+            let interval = param.parse::<u32>().unwrap_or_else(|err| {
+                show_error!("-Zmiri-provenance-gc requires a `u32`: {}", err)
+            });
             miri_config.gc_interval = interval;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-measureme=") {
             miri_config.measureme_out = Some(param.to_string());
@@ -619,23 +585,20 @@ fn main() {
                 show_error!("-Zmiri-extern-so-file `{}` does not exist", filename);
             }
         } else if let Some(param) = arg.strip_prefix("-Zmiri-num-cpus=") {
-            let num_cpus = match param.parse::<u32>() {
-                Ok(i) => i,
-                Err(err) => show_error!("-Zmiri-num-cpus requires a `u32`: {}", err),
-            };
-
+            let num_cpus = param
+                .parse::<u32>()
+                .unwrap_or_else(|err| show_error!("-Zmiri-num-cpus requires a `u32`: {}", err));
             miri_config.num_cpus = num_cpus;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-force-page-size=") {
-            let page_size = match param.parse::<u64>() {
-                Ok(i) =>
-                    if i.is_power_of_two() {
-                        i * 1024
-                    } else {
-                        show_error!("-Zmiri-force-page-size requires a power of 2: {}", i)
-                    },
-                Err(err) => show_error!("-Zmiri-force-page-size requires a `u64`: {}", err),
+            let page_size = param.parse::<u64>().unwrap_or_else(|err| {
+                show_error!("-Zmiri-force-page-size requires a `u64`: {}", err)
+            });
+            // Convert from kilobytes to bytes.
+            let page_size = if page_size.is_power_of_two() {
+                page_size * 1024
+            } else {
+                show_error!("-Zmiri-force-page-size requires a power of 2: {page_size}");
             };
-
             miri_config.page_size = Some(page_size);
         } else {
             // Forward to rustc.

diff --git a/src/eval.rs b/src/eval.rs
@@ -152,6 +152,8 @@ pub struct MiriConfig {
     pub collect_leak_backtraces: bool,
     /// Probability for address reuse.
     pub address_reuse_rate: f64,
+    /// Probability for address reuse across threads.
+    pub address_reuse_cross_thread_rate: f64,
 }
 
 impl Default for MiriConfig {
@@ -189,6 +191,7 @@ impl Default for MiriConfig {
             page_size: None,
             collect_leak_backtraces: true,
             address_reuse_rate: 0.5,
+            address_reuse_cross_thread_rate: 0.1,
         }
     }
 }

diff --git a/tests/fail/both_borrows/retag_data_race_write.rs b/tests/fail/both_borrows/retag_data_race_write.rs
@@ -1,6 +1,8 @@
 //! Make sure that a retag acts like a write for the data race model.
 //@revisions: stack tree
 //@compile-flags: -Zmiri-preemption-rate=0
+// Avoid accidental synchronization via address reuse inside `thread::spawn`.
+//@compile-flags: -Zmiri-address-reuse-cross-thread-rate=0
 //@[tree]compile-flags: -Zmiri-tree-borrows
 #[derive(Copy, Clone)]
 struct SendPtr(*mut u8);

diff --git a/tests/fail/data_race/alloc_read_race.rs b/tests/fail/data_race/alloc_read_race.rs
@@ -1,4 +1,6 @@
 //@compile-flags: -Zmiri-disable-weak-memory-emulation -Zmiri-preemption-rate=0 -Zmiri-disable-stacked-borrows
+// Avoid accidental synchronization via address reuse inside `thread::spawn`.
+//@compile-flags: -Zmiri-address-reuse-cross-thread-rate=0
 #![feature(new_uninit)]
 
 use std::mem::MaybeUninit;

diff --git a/tests/fail/data_race/alloc_write_race.rs b/tests/fail/data_race/alloc_write_race.rs
@@ -1,4 +1,6 @@
 //@compile-flags: -Zmiri-disable-weak-memory-emulation -Zmiri-preemption-rate=0 -Zmiri-disable-stacked-borrows
+// Avoid accidental synchronization via address reuse inside `thread::spawn`.
+//@compile-flags: -Zmiri-address-reuse-cross-thread-rate=0
 #![feature(new_uninit)]
 
 use std::ptr::null_mut;