From 7f1231c986c26dcd1b519b81b78371a750db797a Mon Sep 17 00:00:00 2001 From: Mark Rousskov <mark.simulacrum@gmail.com> Date: Sun, 26 Jan 2025 20:47:49 -0500 Subject: [PATCH] Shard AllocMap Lock This improves performance on many-seed parallel (-Zthreads=32) miri executions from managing to use ~8 cores to using 27-28 cores. That's pretty reasonable scaling for the simplicity of this solution. --- .../rustc_middle/src/mir/interpret/mod.rs | 66 ++++++++++++------- compiler/rustc_middle/src/ty/context.rs | 4 +- 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/compiler/rustc_middle/src/mir/interpret/mod.rs b/compiler/rustc_middle/src/mir/interpret/mod.rs index f4f9f221a7511..45c862e0d34bf 100644 --- a/compiler/rustc_middle/src/mir/interpret/mod.rs +++ b/compiler/rustc_middle/src/mir/interpret/mod.rs @@ -15,7 +15,8 @@ use std::{fmt, io}; use rustc_abi::{AddressSpace, Align, Endian, HasDataLayout, Size}; use rustc_ast::{LitKind, Mutability}; use rustc_data_structures::fx::FxHashMap; -use rustc_data_structures::sync::Lock; +use rustc_data_structures::sharded::ShardedHashMap; +use rustc_data_structures::sync::{AtomicU64, Lock}; use rustc_hir::def::DefKind; use rustc_hir::def_id::{DefId, LocalDefId}; use rustc_macros::{HashStable, TyDecodable, TyEncodable, TypeFoldable, TypeVisitable}; @@ -389,35 +390,39 @@ pub const CTFE_ALLOC_SALT: usize = 0; pub(crate) struct AllocMap<'tcx> { /// Maps `AllocId`s to their corresponding allocations. - alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>, + // Note that this map on rustc workloads seems to be rather dense, but in miri workloads should + // be pretty sparse. In #136105 we considered replacing it with a (dense) Vec-based map, but + // since there are workloads where it can be sparse we decided to go with sharding for now. At + // least up to 32 cores the one workload tested didn't exhibit much difference between the two. + // + // Should be locked *after* locking dedup if locking both to avoid deadlocks. + to_alloc: ShardedHashMap<AllocId, GlobalAlloc<'tcx>>, /// Used to deduplicate global allocations: functions, vtables, string literals, ... /// /// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating /// the actual guarantees. - dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>, + dedup: Lock<FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>>, /// The `AllocId` to assign to the next requested ID. /// Always incremented; never gets smaller. - next_id: AllocId, + next_id: AtomicU64, } impl<'tcx> AllocMap<'tcx> { pub(crate) fn new() -> Self { AllocMap { - alloc_map: Default::default(), + to_alloc: Default::default(), dedup: Default::default(), - next_id: AllocId(NonZero::new(1).unwrap()), + next_id: AtomicU64::new(1), } } - fn reserve(&mut self) -> AllocId { - let next = self.next_id; - self.next_id.0 = self.next_id.0.checked_add(1).expect( - "You overflowed a u64 by incrementing by 1... \ - You've just earned yourself a free drink if we ever meet. \ - Seriously, how did you do that?!", - ); - next + fn reserve(&self) -> AllocId { + // Technically there is a window here where we overflow and then another thread + // increments `next_id` *again* and uses it before we panic and tear down the entire session. + // We consider this fine since such overflows cannot realistically occur. + let next_id = self.next_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + AllocId(NonZero::new(next_id).unwrap()) } } @@ -428,26 +433,34 @@ impl<'tcx> TyCtxt<'tcx> { /// Make sure to call `set_alloc_id_memory` or `set_alloc_id_same_memory` before returning such /// an `AllocId` from a query. pub fn reserve_alloc_id(self) -> AllocId { - self.alloc_map.lock().reserve() + self.alloc_map.reserve() } /// Reserves a new ID *if* this allocation has not been dedup-reserved before. /// Should not be used for mutable memory. fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId { - let mut alloc_map = self.alloc_map.lock(); if let GlobalAlloc::Memory(mem) = alloc { if mem.inner().mutability.is_mut() { bug!("trying to dedup-reserve mutable memory"); } } let alloc_salt = (alloc, salt); - if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) { + // Locking this *before* `to_alloc` also to ensure correct lock order. + let mut dedup = self.alloc_map.dedup.lock(); + if let Some(&alloc_id) = dedup.get(&alloc_salt) { return alloc_id; } - let id = alloc_map.reserve(); + let id = self.alloc_map.reserve(); debug!("creating alloc {:?} with id {id:?}", alloc_salt.0); - alloc_map.alloc_map.insert(id, alloc_salt.0.clone()); - alloc_map.dedup.insert(alloc_salt, id); + let had_previous = self + .alloc_map + .to_alloc + .lock_shard_by_value(&id) + .insert(id, alloc_salt.0.clone()) + .is_some(); + // We just reserved, so should always be unique. + assert!(!had_previous); + dedup.insert(alloc_salt, id); id } @@ -497,7 +510,7 @@ impl<'tcx> TyCtxt<'tcx> { /// local dangling pointers and allocations in constants/statics. #[inline] pub fn try_get_global_alloc(self, id: AllocId) -> Option<GlobalAlloc<'tcx>> { - self.alloc_map.lock().alloc_map.get(&id).cloned() + self.alloc_map.to_alloc.lock_shard_by_value(&id).get(&id).cloned() } #[inline] @@ -516,7 +529,9 @@ impl<'tcx> TyCtxt<'tcx> { /// Freezes an `AllocId` created with `reserve` by pointing it at an `Allocation`. Trying to /// call this function twice, even with the same `Allocation` will ICE the compiler. pub fn set_alloc_id_memory(self, id: AllocId, mem: ConstAllocation<'tcx>) { - if let Some(old) = self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Memory(mem)) { + if let Some(old) = + self.alloc_map.to_alloc.lock_shard_by_value(&id).insert(id, GlobalAlloc::Memory(mem)) + { bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}"); } } @@ -524,8 +539,11 @@ impl<'tcx> TyCtxt<'tcx> { /// Freezes an `AllocId` created with `reserve` by pointing it at a static item. Trying to /// call this function twice, even with the same `DefId` will ICE the compiler. pub fn set_nested_alloc_id_static(self, id: AllocId, def_id: LocalDefId) { - if let Some(old) = - self.alloc_map.lock().alloc_map.insert(id, GlobalAlloc::Static(def_id.to_def_id())) + if let Some(old) = self + .alloc_map + .to_alloc + .lock_shard_by_value(&id) + .insert(id, GlobalAlloc::Static(def_id.to_def_id())) { bug!("tried to set allocation ID {id:?}, but it was already existing as {old:#?}"); } diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 69f6fc0ad8a66..0f1fc891b0f96 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -1366,7 +1366,7 @@ pub struct GlobalCtxt<'tcx> { pub data_layout: TargetDataLayout, /// Stores memory for globals (statics/consts). - pub(crate) alloc_map: Lock<interpret::AllocMap<'tcx>>, + pub(crate) alloc_map: interpret::AllocMap<'tcx>, current_gcx: CurrentGcx, } @@ -1583,7 +1583,7 @@ impl<'tcx> TyCtxt<'tcx> { new_solver_evaluation_cache: Default::default(), canonical_param_env_cache: Default::default(), data_layout, - alloc_map: Lock::new(interpret::AllocMap::new()), + alloc_map: interpret::AllocMap::new(), current_gcx, });