Skip to content

Commit 75cb8cd

Browse files
committed
Auto merge of rust-lang#125928 - michaelwoerister:fix-cgu-hashstable, r=<try>
Stabilize order of MonoItems in CGUs and disallow query_instability lint for rustc_monomorphize The HashStable impl for `CodegenUnit` was incorrect as described in [MCP 533](rust-lang/compiler-team#533). This PR removes any indeterminism from the way codegen units are built. The changes are pretty straightforward. Part of rust-lang#84447 and [MCP 533](rust-lang/compiler-team#533).
2 parents 8768db9 + 6cfdc57 commit 75cb8cd

File tree

4 files changed

+77
-77
lines changed

4 files changed

+77
-77
lines changed

compiler/rustc_middle/src/mir/mono.rs

+21-30
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ use rustc_data_structures::base_n::BaseNString;
55
use rustc_data_structures::base_n::ToBaseN;
66
use rustc_data_structures::base_n::CASE_INSENSITIVE;
77
use rustc_data_structures::fingerprint::Fingerprint;
8-
use rustc_data_structures::fx::FxHashMap;
98
use rustc_data_structures::fx::FxIndexMap;
10-
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
9+
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher, ToStableHashKey};
10+
use rustc_data_structures::unord::UnordMap;
1111
use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
1212
use rustc_hir::ItemId;
1313
use rustc_index::Idx;
@@ -241,7 +241,17 @@ impl<'tcx> fmt::Display for MonoItem<'tcx> {
241241
}
242242
}
243243

244-
#[derive(Debug)]
244+
impl ToStableHashKey<StableHashingContext<'_>> for MonoItem<'_> {
245+
type KeyType = Fingerprint;
246+
247+
fn to_stable_hash_key(&self, hcx: &StableHashingContext<'_>) -> Self::KeyType {
248+
let mut hasher = StableHasher::new();
249+
self.hash_stable(&mut hcx.clone(), &mut hasher);
250+
hasher.finish()
251+
}
252+
}
253+
254+
#[derive(Debug, HashStable)]
245255
pub struct CodegenUnit<'tcx> {
246256
/// A name for this CGU. Incremental compilation requires that
247257
/// name be unique amongst **all** crates. Therefore, it should
@@ -430,38 +440,19 @@ impl<'tcx> CodegenUnit<'tcx> {
430440
}
431441
}
432442

433-
impl<'a, 'tcx> HashStable<StableHashingContext<'a>> for CodegenUnit<'tcx> {
434-
fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) {
435-
let CodegenUnit {
436-
ref items,
437-
name,
438-
// The size estimate is not relevant to the hash
439-
size_estimate: _,
440-
primary: _,
441-
is_code_coverage_dead_code_cgu,
442-
} = *self;
443-
444-
name.hash_stable(hcx, hasher);
445-
is_code_coverage_dead_code_cgu.hash_stable(hcx, hasher);
446-
447-
let mut items: Vec<(Fingerprint, _)> = items
448-
.iter()
449-
.map(|(mono_item, &attrs)| {
450-
let mut hasher = StableHasher::new();
451-
mono_item.hash_stable(hcx, &mut hasher);
452-
let mono_item_fingerprint = hasher.finish();
453-
(mono_item_fingerprint, attrs)
454-
})
455-
.collect();
456-
457-
items.sort_unstable_by_key(|i| i.0);
458-
items.hash_stable(hcx, hasher);
443+
impl ToStableHashKey<StableHashingContext<'_>> for CodegenUnit<'_> {
444+
type KeyType = String;
445+
446+
fn to_stable_hash_key(&self, _: &StableHashingContext<'_>) -> Self::KeyType {
447+
// Codegen unit names are conceptually required to be stable across
448+
// compilation session so that object file names match up.
449+
self.name.to_string()
459450
}
460451
}
461452

462453
pub struct CodegenUnitNameBuilder<'tcx> {
463454
tcx: TyCtxt<'tcx>,
464-
cache: FxHashMap<CrateNum, String>,
455+
cache: UnordMap<CrateNum, String>,
465456
}
466457

467458
impl<'tcx> CodegenUnitNameBuilder<'tcx> {

compiler/rustc_monomorphize/src/collector.rs

+20-14
Original file line numberDiff line numberDiff line change
@@ -207,8 +207,8 @@
207207
208208
mod move_check;
209209

210-
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
211210
use rustc_data_structures::sync::{par_for_each_in, LRef, MTLock};
211+
use rustc_data_structures::unord::{UnordMap, UnordSet};
212212
use rustc_hir as hir;
213213
use rustc_hir::def::DefKind;
214214
use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId};
@@ -251,21 +251,21 @@ pub enum MonoItemCollectionStrategy {
251251

252252
pub struct UsageMap<'tcx> {
253253
// Maps every mono item to the mono items used by it.
254-
used_map: FxHashMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
254+
used_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
255255

256256
// Maps every mono item to the mono items that use it.
257-
user_map: FxHashMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
257+
user_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
258258
}
259259

260260
type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>;
261261

262262
/// The state that is shared across the concurrent threads that are doing collection.
263263
struct SharedState<'tcx> {
264264
/// Items that have been or are currently being recursively collected.
265-
visited: MTLock<FxHashSet<MonoItem<'tcx>>>,
265+
visited: MTLock<UnordSet<MonoItem<'tcx>>>,
266266
/// Items that have been or are currently being recursively treated as "mentioned", i.e., their
267267
/// consts are evaluated but nothing is added to the collection.
268-
mentioned: MTLock<FxHashSet<MonoItem<'tcx>>>,
268+
mentioned: MTLock<UnordSet<MonoItem<'tcx>>>,
269269
/// Which items are being used where, for better errors.
270270
usage_map: MTLock<UsageMap<'tcx>>,
271271
}
@@ -290,7 +290,7 @@ enum CollectionMode {
290290

291291
impl<'tcx> UsageMap<'tcx> {
292292
fn new() -> UsageMap<'tcx> {
293-
UsageMap { used_map: FxHashMap::default(), user_map: FxHashMap::default() }
293+
UsageMap { used_map: Default::default(), user_map: Default::default() }
294294
}
295295

296296
fn record_used<'a>(
@@ -668,7 +668,7 @@ struct MirUsedCollector<'a, 'tcx> {
668668
used_items: &'a mut MonoItems<'tcx>,
669669
/// See the comment in `collect_items_of_instance` for the purpose of this set.
670670
/// Note that this contains *not-monomorphized* items!
671-
used_mentioned_items: &'a mut FxHashSet<MentionedItem<'tcx>>,
671+
used_mentioned_items: &'a mut UnordSet<MentionedItem<'tcx>>,
672672
instance: Instance<'tcx>,
673673
visiting_call_terminator: bool,
674674
move_check: move_check::MoveCheckState,
@@ -1272,7 +1272,7 @@ fn collect_items_of_instance<'tcx>(
12721272
// mentioned item. So instead we collect all pre-monomorphized `MentionedItem` that were already
12731273
// added to `used_items` in a hash set, which can efficiently query in the
12741274
// `body.mentioned_items` loop below without even having to monomorphize the item.
1275-
let mut used_mentioned_items = FxHashSet::<MentionedItem<'tcx>>::default();
1275+
let mut used_mentioned_items = Default::default();
12761276
let mut collector = MirUsedCollector {
12771277
tcx,
12781278
body,
@@ -1628,10 +1628,10 @@ fn create_mono_items_for_default_impls<'tcx>(
16281628
//=-----------------------------------------------------------------------------
16291629

16301630
#[instrument(skip(tcx, strategy), level = "debug")]
1631-
pub fn collect_crate_mono_items(
1632-
tcx: TyCtxt<'_>,
1631+
pub(crate) fn collect_crate_mono_items<'tcx>(
1632+
tcx: TyCtxt<'tcx>,
16331633
strategy: MonoItemCollectionStrategy,
1634-
) -> (FxHashSet<MonoItem<'_>>, UsageMap<'_>) {
1634+
) -> (Vec<MonoItem<'tcx>>, UsageMap<'tcx>) {
16351635
let _prof_timer = tcx.prof.generic_activity("monomorphization_collector");
16361636

16371637
let roots = tcx
@@ -1641,8 +1641,8 @@ pub fn collect_crate_mono_items(
16411641
debug!("building mono item graph, beginning at roots");
16421642

16431643
let mut state = SharedState {
1644-
visited: MTLock::new(FxHashSet::default()),
1645-
mentioned: MTLock::new(FxHashSet::default()),
1644+
visited: MTLock::new(UnordSet::default()),
1645+
mentioned: MTLock::new(UnordSet::default()),
16461646
usage_map: MTLock::new(UsageMap::new()),
16471647
};
16481648
let recursion_limit = tcx.recursion_limit();
@@ -1665,5 +1665,11 @@ pub fn collect_crate_mono_items(
16651665
});
16661666
}
16671667

1668-
(state.visited.into_inner(), state.usage_map.into_inner())
1668+
// The set of MonoItems was created in an inherently indeterministic order because
1669+
// of parallelism. We sort it here to ensure that the output is deterministic.
1670+
let mono_items = tcx.with_stable_hashing_context(move |ref hcx| {
1671+
state.visited.into_inner().into_sorted(hcx, true)
1672+
});
1673+
1674+
(mono_items, state.usage_map.into_inner())
16691675
}

compiler/rustc_monomorphize/src/lib.rs

-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#![feature(array_windows)]
22
#![feature(is_sorted)]
3-
#![allow(rustc::potential_query_instability)]
43

54
use rustc_hir::lang_items::LangItem;
65
use rustc_middle::bug;

compiler/rustc_monomorphize/src/partitioning.rs

+36-32
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,9 @@ use std::fs::{self, File};
9898
use std::io::{BufWriter, Write};
9999
use std::path::{Path, PathBuf};
100100

101-
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
101+
use rustc_data_structures::fx::{FxIndexMap, FxIndexSet};
102102
use rustc_data_structures::sync;
103+
use rustc_data_structures::unord::{UnordMap, UnordSet};
103104
use rustc_hir::def::DefKind;
104105
use rustc_hir::def_id::{DefId, DefIdSet, LOCAL_CRATE};
105106
use rustc_hir::definitions::DefPathDataName;
@@ -131,7 +132,7 @@ struct PlacedMonoItems<'tcx> {
131132
/// The codegen units, sorted by name to make things deterministic.
132133
codegen_units: Vec<CodegenUnit<'tcx>>,
133134

134-
internalization_candidates: FxHashSet<MonoItem<'tcx>>,
135+
internalization_candidates: UnordSet<MonoItem<'tcx>>,
135136
}
136137

137138
// The output CGUs are sorted by name.
@@ -197,9 +198,9 @@ fn place_mono_items<'tcx, I>(cx: &PartitioningCx<'_, 'tcx>, mono_items: I) -> Pl
197198
where
198199
I: Iterator<Item = MonoItem<'tcx>>,
199200
{
200-
let mut codegen_units = FxHashMap::default();
201+
let mut codegen_units = UnordMap::default();
201202
let is_incremental_build = cx.tcx.sess.opts.incremental.is_some();
202-
let mut internalization_candidates = FxHashSet::default();
203+
let mut internalization_candidates = UnordSet::default();
203204

204205
// Determine if monomorphizations instantiated in this crate will be made
205206
// available to downstream crates. This depends on whether we are in
@@ -209,7 +210,7 @@ where
209210
cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics();
210211

211212
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
212-
let cgu_name_cache = &mut FxHashMap::default();
213+
let cgu_name_cache = &mut UnordMap::default();
213214

214215
for mono_item in mono_items {
215216
// Handle only root (GloballyShared) items directly here. Inlined (LocalCopy) items
@@ -260,7 +261,7 @@ where
260261
// going via another root item. This includes drop-glue, functions from
261262
// external crates, and local functions the definition of which is
262263
// marked with `#[inline]`.
263-
let mut reachable_inlined_items = FxHashSet::default();
264+
let mut reachable_inlined_items = FxIndexSet::default();
264265
get_reachable_inlined_items(cx.tcx, mono_item, cx.usage_map, &mut reachable_inlined_items);
265266

266267
// Add those inlined items. It's possible an inlined item is reachable
@@ -284,8 +285,9 @@ where
284285
codegen_units.insert(cgu_name, CodegenUnit::new(cgu_name));
285286
}
286287

287-
let mut codegen_units: Vec<_> = codegen_units.into_values().collect();
288-
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str()));
288+
let mut codegen_units: Vec<_> = cx.tcx.with_stable_hashing_context(|ref hcx| {
289+
codegen_units.into_items().map(|(_, cgu)| cgu).collect_sorted(hcx, true)
290+
});
289291

290292
for cgu in codegen_units.iter_mut() {
291293
cgu.compute_size_estimate();
@@ -297,7 +299,7 @@ where
297299
tcx: TyCtxt<'tcx>,
298300
item: MonoItem<'tcx>,
299301
usage_map: &UsageMap<'tcx>,
300-
visited: &mut FxHashSet<MonoItem<'tcx>>,
302+
visited: &mut FxIndexSet<MonoItem<'tcx>>,
301303
) {
302304
usage_map.for_each_inlined_used_item(tcx, item, |inlined_item| {
303305
let is_new = visited.insert(inlined_item);
@@ -320,7 +322,7 @@ fn merge_codegen_units<'tcx>(
320322
assert!(codegen_units.is_sorted_by(|a, b| a.name().as_str() <= b.name().as_str()));
321323

322324
// This map keeps track of what got merged into what.
323-
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> =
325+
let mut cgu_contents: UnordMap<Symbol, Vec<Symbol>> =
324326
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
325327

326328
// If N is the maximum number of CGUs, and the CGUs are sorted from largest
@@ -422,22 +424,24 @@ fn merge_codegen_units<'tcx>(
422424
// For CGUs that contain the code of multiple modules because of the
423425
// merging done above, we use a concatenation of the names of all
424426
// contained CGUs.
425-
let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents
426-
.into_iter()
427-
// This `filter` makes sure we only update the name of CGUs that
428-
// were actually modified by merging.
429-
.filter(|(_, cgu_contents)| cgu_contents.len() > 1)
430-
.map(|(current_cgu_name, cgu_contents)| {
431-
let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| s.as_str()).collect();
432-
433-
// Sort the names, so things are deterministic and easy to
434-
// predict. We are sorting primitive `&str`s here so we can
435-
// use unstable sort.
436-
cgu_contents.sort_unstable();
437-
438-
(current_cgu_name, cgu_contents.join("--"))
439-
})
440-
.collect();
427+
let new_cgu_names = UnordMap::from(
428+
cgu_contents
429+
.items()
430+
// This `filter` makes sure we only update the name of CGUs that
431+
// were actually modified by merging.
432+
.filter(|(_, cgu_contents)| cgu_contents.len() > 1)
433+
.map(|(current_cgu_name, cgu_contents)| {
434+
let mut cgu_contents: Vec<&str> =
435+
cgu_contents.iter().map(|s| s.as_str()).collect();
436+
437+
// Sort the names, so things are deterministic and easy to
438+
// predict. We are sorting primitive `&str`s here so we can
439+
// use unstable sort.
440+
cgu_contents.sort_unstable();
441+
442+
(*current_cgu_name, cgu_contents.join("--"))
443+
}),
444+
);
441445

442446
for cgu in codegen_units.iter_mut() {
443447
if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
@@ -511,7 +515,7 @@ fn compute_inlined_overlap<'tcx>(cgu1: &CodegenUnit<'tcx>, cgu2: &CodegenUnit<'t
511515
fn internalize_symbols<'tcx>(
512516
cx: &PartitioningCx<'_, 'tcx>,
513517
codegen_units: &mut [CodegenUnit<'tcx>],
514-
internalization_candidates: FxHashSet<MonoItem<'tcx>>,
518+
internalization_candidates: UnordSet<MonoItem<'tcx>>,
515519
) {
516520
/// For symbol internalization, we need to know whether a symbol/mono-item
517521
/// is used from outside the codegen unit it is defined in. This type is
@@ -522,7 +526,7 @@ fn internalize_symbols<'tcx>(
522526
MultipleCgus,
523527
}
524528

525-
let mut mono_item_placements = FxHashMap::default();
529+
let mut mono_item_placements = UnordMap::default();
526530
let single_codegen_unit = codegen_units.len() == 1;
527531

528532
if !single_codegen_unit {
@@ -739,7 +743,7 @@ fn mono_item_linkage_and_visibility<'tcx>(
739743
(Linkage::External, vis)
740744
}
741745

742-
type CguNameCache = FxHashMap<(DefId, bool), Symbol>;
746+
type CguNameCache = UnordMap<(DefId, bool), Symbol>;
743747

744748
fn static_visibility<'tcx>(
745749
tcx: TyCtxt<'tcx>,
@@ -932,7 +936,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
932936
//
933937
// Also, unreached inlined items won't be counted here. This is fine.
934938

935-
let mut inlined_items = FxHashSet::default();
939+
let mut inlined_items = UnordSet::default();
936940

937941
let mut root_items = 0;
938942
let mut unique_inlined_items = 0;
@@ -1164,7 +1168,7 @@ fn collect_and_partition_mono_items(tcx: TyCtxt<'_>, (): ()) -> (&DefIdSet, &[Co
11641168
}
11651169

11661170
if tcx.sess.opts.unstable_opts.print_mono_items.is_some() {
1167-
let mut item_to_cgus: FxHashMap<_, Vec<_>> = Default::default();
1171+
let mut item_to_cgus: UnordMap<_, Vec<_>> = Default::default();
11681172

11691173
for cgu in codegen_units {
11701174
for (&mono_item, &data) in cgu.items() {
@@ -1240,7 +1244,7 @@ fn dump_mono_items_stats<'tcx>(
12401244
let mut file = BufWriter::new(file);
12411245

12421246
// Gather instantiated mono items grouped by def_id
1243-
let mut items_per_def_id: FxHashMap<_, Vec<_>> = Default::default();
1247+
let mut items_per_def_id: FxIndexMap<_, Vec<_>> = Default::default();
12441248
for cgu in codegen_units {
12451249
cgu.items()
12461250
.keys()

0 commit comments

Comments
 (0)