Skip to content

Commit 3540f93

Browse files
committed
Add disambiugator to ExpnData
Due to macro expansion, its possible to end up with two distinct `ExpnId`s that have the same `ExpnData` contents. This violates the contract of `HashStable`, since two unequal `ExpnId`s will end up with equal `Fingerprint`s. This commit adds a `disambiguator` field to `ExpnData`, which is used to force two otherwise-equivalent `ExpnData`s to be distinct.
1 parent 4d0dd02 commit 3540f93

File tree

3 files changed

+173
-14
lines changed

3 files changed

+173
-14
lines changed

compiler/rustc_middle/src/ich/hcx.rs

+8
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use rustc_span::{BytePos, CachingSourceMapView, SourceFile, SpanData};
1717
use rustc_span::def_id::{CrateNum, CRATE_DEF_INDEX};
1818
use smallvec::SmallVec;
1919
use std::cmp::Ord;
20+
use std::thread::LocalKey;
2021

2122
fn compute_ignored_attr_names() -> FxHashSet<Symbol> {
2223
debug_assert!(!ich::IGNORED_ATTRIBUTES.is_empty());
@@ -242,6 +243,13 @@ impl<'a> rustc_span::HashStableContext for StableHashingContext<'a> {
242243
hcx.def_path_hash(def_id).hash_stable(hcx, hasher);
243244
}
244245

246+
fn expn_id_cache() -> &'static LocalKey<rustc_span::ExpnIdCache> {
247+
thread_local! {
248+
static CACHE: rustc_span::ExpnIdCache = Default::default();
249+
}
250+
&CACHE
251+
}
252+
245253
fn byte_pos_to_line_and_col(
246254
&mut self,
247255
byte: BytePos,

compiler/rustc_span/src/hygiene.rs

+152-5
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,18 @@
2727
use crate::edition::Edition;
2828
use crate::symbol::{kw, sym, Symbol};
2929
use crate::SESSION_GLOBALS;
30-
use crate::{Span, DUMMY_SP};
30+
use crate::{BytePos, CachingSourceMapView, ExpnIdCache, SourceFile, Span, DUMMY_SP};
3131

3232
use crate::def_id::{CrateNum, DefId, CRATE_DEF_INDEX, LOCAL_CRATE};
33+
use rustc_data_structures::fingerprint::Fingerprint;
3334
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
35+
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
3436
use rustc_data_structures::sync::{Lock, Lrc};
3537
use rustc_macros::HashStable_Generic;
3638
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
3739
use std::fmt;
40+
use std::hash::Hash;
41+
use std::thread::LocalKey;
3842
use tracing::*;
3943

4044
/// A `SyntaxContext` represents a chain of pairs `(ExpnId, Transparency)` named "marks".
@@ -80,7 +84,12 @@ pub enum Transparency {
8084

8185
impl ExpnId {
8286
pub fn fresh(expn_data: Option<ExpnData>) -> Self {
83-
HygieneData::with(|data| data.fresh_expn(expn_data))
87+
let has_data = expn_data.is_some();
88+
let expn_id = HygieneData::with(|data| data.fresh_expn(expn_data));
89+
if has_data {
90+
update_disambiguator(expn_id);
91+
}
92+
expn_id
8493
}
8594

8695
/// The ID of the theoretical expansion that generates freshly parsed, unexpanded AST.
@@ -111,7 +120,8 @@ impl ExpnId {
111120
assert!(old_expn_data.is_none(), "expansion data is reset for an expansion ID");
112121
expn_data.orig_id.replace(self.as_u32()).expect_none("orig_id should be None");
113122
*old_expn_data = Some(expn_data);
114-
})
123+
});
124+
update_disambiguator(self)
115125
}
116126

117127
pub fn is_descendant_of(self, ancestor: ExpnId) -> bool {
@@ -152,6 +162,12 @@ pub struct HygieneData {
152162
expn_data: Vec<Option<ExpnData>>,
153163
syntax_context_data: Vec<SyntaxContextData>,
154164
syntax_context_map: FxHashMap<(SyntaxContext, ExpnId, Transparency), SyntaxContext>,
165+
/// Maps the `Fingerprint` of an `ExpnData` to the next disambiguator value.
166+
/// This is used by `update_disambiguator` to keep track of which `ExpnData`s
167+
/// would have collisions without a disambiguator.
168+
/// The keys of this map are always computed with `ExpnData.disambiguator`
169+
/// set to 0.
170+
expn_data_disambiguators: FxHashMap<Fingerprint, u32>,
155171
}
156172

157173
impl HygieneData {
@@ -175,6 +191,7 @@ impl HygieneData {
175191
dollar_crate_name: kw::DollarCrate,
176192
}],
177193
syntax_context_map: FxHashMap::default(),
194+
expn_data_disambiguators: FxHashMap::default(),
178195
}
179196
}
180197

@@ -649,8 +666,8 @@ impl Span {
649666
expn_data: ExpnData,
650667
transparency: Transparency,
651668
) -> Span {
669+
let expn_id = ExpnId::fresh(Some(expn_data));
652670
HygieneData::with(|data| {
653-
let expn_id = data.fresh_expn(Some(expn_data));
654671
self.with_ctxt(data.apply_mark(SyntaxContext::root(), expn_id, transparency))
655672
})
656673
}
@@ -726,10 +743,23 @@ pub struct ExpnData {
726743
// be considered equivalent.
727744
#[stable_hasher(ignore)]
728745
orig_id: Option<u32>,
746+
747+
/// Used to force two `ExpnData`s to have different `Fingerprint`s.
748+
/// Due to macro expansion, it's possible to end up with two `ExpnId`s
749+
/// that have identical `ExpnData`s. This violates the constract of `HashStable`
750+
/// - the two `ExpnId`s are not equal, but their `Fingerprint`s are equal
751+
/// (since the numerical `ExpnId` value is not considered by the `HashStable`
752+
/// implementation).
753+
///
754+
/// The `disambiguator` field is set by `update_disambiguator` when two distinct
755+
/// `ExpnId`s would end up with the same `Fingerprint`. Since `ExpnData` includes
756+
/// a `krate` field, this value only needs to be unique within a single crate.
757+
disambiguator: u32,
729758
}
730759

731-
// This would require special handling of `orig_id` and `parent`
760+
// These would require special handling of `orig_id`.
732761
impl !PartialEq for ExpnData {}
762+
impl !Hash for ExpnData {}
733763

734764
impl ExpnData {
735765
pub fn new(
@@ -755,6 +785,7 @@ impl ExpnData {
755785
macro_def_id,
756786
krate: LOCAL_CRATE,
757787
orig_id: None,
788+
disambiguator: 0,
758789
}
759790
}
760791

@@ -777,6 +808,7 @@ impl ExpnData {
777808
macro_def_id,
778809
krate: LOCAL_CRATE,
779810
orig_id: None,
811+
disambiguator: 0,
780812
}
781813
}
782814

@@ -1276,3 +1308,118 @@ impl<D: Decoder> Decodable<D> for SyntaxContext {
12761308
panic!("cannot decode `SyntaxContext` with `{}`", std::any::type_name::<D>());
12771309
}
12781310
}
1311+
1312+
/// Updates the `disambiguator` field of the corresponding `ExpnData`
1313+
/// such that the `Fingerprint` of the `ExpnData` does not collide with
1314+
/// any other `ExpnIds`.
1315+
///
1316+
/// This method is called only when an `ExpnData` is first associated
1317+
/// with an `ExpnId` (when the `ExpnId` is initially constructed, or via
1318+
/// `set_expn_data`). It is *not* called for foreign `ExpnId`s deserialized
1319+
/// from another crate's metadata - since `ExpnData` includes a `krate` field,
1320+
/// collisions are only possible between `ExpnId`s within the same crate.
1321+
fn update_disambiguator(expn_id: ExpnId) {
1322+
/// A `HashStableContext` which hashes the raw id values for `DefId`
1323+
/// and `CrateNum`, rather than using their computed stable hash.
1324+
///
1325+
/// This allows us to use the `HashStable` implementation on `ExpnId`
1326+
/// early on in compilation, before we've constructed a `TyCtxt`.
1327+
/// The `Fingerprint`s created by this context are not 'stable', since
1328+
/// the raw `CrateNum` and `DefId` values for an item may change between
1329+
/// sessions due to unrelated changes (e.g. adding/removing an different item).
1330+
///
1331+
/// However, this is fine for our purposes - we only need to detect
1332+
/// when two `ExpnData`s have the same `Fingerprint`. Since the hashes produced
1333+
/// by this context still obey the properties of `HashStable`, we have
1334+
/// that
1335+
/// `hash_stable(expn1, DummyHashStableContext) == hash_stable(expn2, DummyHashStableContext)`
1336+
/// iff `hash_stable(expn1, StableHashingContext) == hash_stable(expn2, StableHasingContext)`.
1337+
///
1338+
/// This is sufficient for determining when we need to update the disambiguator.
1339+
struct DummyHashStableContext<'a> {
1340+
caching_source_map: CachingSourceMapView<'a>,
1341+
}
1342+
1343+
impl<'a> crate::HashStableContext for DummyHashStableContext<'a> {
1344+
fn hash_def_id(&mut self, def_id: DefId, hasher: &mut StableHasher) {
1345+
def_id.krate.as_u32().hash_stable(self, hasher);
1346+
def_id.index.as_u32().hash_stable(self, hasher);
1347+
}
1348+
1349+
fn expn_id_cache() -> &'static LocalKey<ExpnIdCache> {
1350+
// This cache is only used by `DummyHashStableContext`,
1351+
// so we won't pollute the cache values of the normal `StableHashingContext`
1352+
thread_local! {
1353+
static CACHE: ExpnIdCache = Default::default();
1354+
}
1355+
1356+
&CACHE
1357+
}
1358+
1359+
fn hash_crate_num(&mut self, krate: CrateNum, hasher: &mut StableHasher) {
1360+
krate.as_u32().hash_stable(self, hasher);
1361+
}
1362+
fn hash_spans(&self) -> bool {
1363+
true
1364+
}
1365+
fn byte_pos_to_line_and_col(
1366+
&mut self,
1367+
byte: BytePos,
1368+
) -> Option<(Lrc<SourceFile>, usize, BytePos)> {
1369+
self.caching_source_map.byte_pos_to_line_and_col(byte)
1370+
}
1371+
fn span_data_to_lines_and_cols(
1372+
&mut self,
1373+
span: &crate::SpanData,
1374+
) -> Option<(Lrc<SourceFile>, usize, BytePos, usize, BytePos)> {
1375+
self.caching_source_map.span_data_to_lines_and_cols(span)
1376+
}
1377+
}
1378+
1379+
let source_map = SESSION_GLOBALS
1380+
.with(|session_globals| session_globals.source_map.borrow().as_ref().unwrap().clone());
1381+
1382+
let mut ctx =
1383+
DummyHashStableContext { caching_source_map: CachingSourceMapView::new(&source_map) };
1384+
1385+
let mut hasher = StableHasher::new();
1386+
1387+
let expn_data = expn_id.expn_data();
1388+
// This disambiguator should not have been set yet.
1389+
assert_eq!(
1390+
expn_data.disambiguator, 0,
1391+
"Already set disambiguator for ExpnData: {:?}",
1392+
expn_data
1393+
);
1394+
expn_data.hash_stable(&mut ctx, &mut hasher);
1395+
let first_hash = hasher.finish();
1396+
1397+
let modified = HygieneData::with(|data| {
1398+
// If this is the first ExpnData with a given hash, then keep our
1399+
// disambiguator at 0 (the default u32 value)
1400+
let disambig = data.expn_data_disambiguators.entry(first_hash).or_default();
1401+
data.expn_data[expn_id.0 as usize].as_mut().unwrap().disambiguator = *disambig;
1402+
*disambig += 1;
1403+
1404+
*disambig != 1
1405+
});
1406+
1407+
if modified {
1408+
info!("Set disambiguator for {:?} (hash {:?})", expn_id, first_hash);
1409+
info!("expn_data = {:?}", expn_id.expn_data());
1410+
1411+
// Verify that the new disambiguator makes the hash unique
1412+
#[cfg(debug_assertions)]
1413+
{
1414+
hasher = StableHasher::new();
1415+
expn_id.expn_data().hash_stable(&mut ctx, &mut hasher);
1416+
let new_hash: Fingerprint = hasher.finish();
1417+
1418+
HygieneData::with(|data| {
1419+
data.expn_data_disambiguators
1420+
.get(&new_hash)
1421+
.expect_none("Hash collision after disambiguator update!");
1422+
});
1423+
};
1424+
}
1425+
}

compiler/rustc_span/src/lib.rs

+13-9
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ use std::hash::Hash;
6565
use std::ops::{Add, Range, Sub};
6666
use std::path::{Path, PathBuf};
6767
use std::str::FromStr;
68+
use std::thread::LocalKey;
6869

6970
use md5::Md5;
7071
use sha1::Digest;
@@ -1865,6 +1866,11 @@ fn lookup_line(lines: &[BytePos], pos: BytePos) -> isize {
18651866
/// instead of implementing everything in rustc_middle.
18661867
pub trait HashStableContext {
18671868
fn hash_def_id(&mut self, _: DefId, hasher: &mut StableHasher);
1869+
/// Obtains a cache for storing the `Fingerprint` of an `ExpnId`.
1870+
/// This method allows us to have multiple `HashStableContext` implementations
1871+
/// that hash things in a different way, without the results of one polluting
1872+
/// the cache of the other.
1873+
fn expn_id_cache() -> &'static LocalKey<ExpnIdCache>;
18681874
fn hash_crate_num(&mut self, _: CrateNum, hasher: &mut StableHasher);
18691875
fn hash_spans(&self) -> bool;
18701876
fn byte_pos_to_line_and_col(
@@ -1961,15 +1967,10 @@ impl<CTX: HashStableContext> HashStable<CTX> for SyntaxContext {
19611967
}
19621968
}
19631969

1970+
pub type ExpnIdCache = RefCell<Vec<Option<Fingerprint>>>;
1971+
19641972
impl<CTX: HashStableContext> HashStable<CTX> for ExpnId {
19651973
fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
1966-
// Since the same expansion context is usually referenced many
1967-
// times, we cache a stable hash of it and hash that instead of
1968-
// recursing every time.
1969-
thread_local! {
1970-
static CACHE: RefCell<Vec<Option<Fingerprint>>> = Default::default();
1971-
}
1972-
19731974
const TAG_ROOT: u8 = 0;
19741975
const TAG_NOT_ROOT: u8 = 1;
19751976

@@ -1978,8 +1979,11 @@ impl<CTX: HashStableContext> HashStable<CTX> for ExpnId {
19781979
return;
19791980
}
19801981

1982+
// Since the same expansion context is usually referenced many
1983+
// times, we cache a stable hash of it and hash that instead of
1984+
// recursing every time.
19811985
let index = self.as_u32() as usize;
1982-
let res = CACHE.with(|cache| cache.borrow().get(index).copied().flatten());
1986+
let res = CTX::expn_id_cache().with(|cache| cache.borrow().get(index).copied().flatten());
19831987

19841988
if let Some(res) = res {
19851989
res.hash_stable(ctx, hasher);
@@ -1991,7 +1995,7 @@ impl<CTX: HashStableContext> HashStable<CTX> for ExpnId {
19911995
self.expn_data().hash_stable(ctx, &mut sub_hasher);
19921996
let sub_hash: Fingerprint = sub_hasher.finish();
19931997

1994-
CACHE.with(|cache| {
1998+
CTX::expn_id_cache().with(|cache| {
19951999
let mut cache = cache.borrow_mut();
19962000
if cache.len() < new_len {
19972001
cache.resize(new_len, None);

0 commit comments

Comments
 (0)