Skip to content

Commit 847f4fc

Browse files
committed
Auto merge of #45522 - michaelwoerister:fix-stable-hasher-cross, r=arielb1
Fix 32- vs 64-bit platform instability in StableHasher. This might already be enough to fix issue #45500. r? @alexcrichton
2 parents b218a02 + 54818b3 commit 847f4fc

File tree

1 file changed

+16
-24
lines changed

1 file changed

+16
-24
lines changed

src/librustc_data_structures/stable_hasher.rs

+16-24
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,13 @@ use std::marker::PhantomData;
1313
use std::mem;
1414
use sip128::SipHasher128;
1515

16-
/// When hashing something that ends up affecting properties like symbol names. We
17-
/// want these symbol names to be calculated independent of other factors like
18-
/// what architecture you're compiling *from*.
16+
/// When hashing something that ends up affecting properties like symbol names,
17+
/// we want these symbol names to be calculated independently of other factors
18+
/// like what architecture you're compiling *from*.
1919
///
20-
/// The hashing just uses the standard `Hash` trait, but the implementations of
21-
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
22-
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
23-
/// `isize` completely when hashing.
24-
///
25-
/// To do that, we encode all integers to be hashed with some
26-
/// arch-independent encoding.
27-
///
28-
/// At the moment, we pass i8/u8 straight through and encode
29-
/// all other integers using leb128.
30-
///
31-
/// This hasher currently always uses the stable Blake2b algorithm
32-
/// and allows for variable output lengths through its type
33-
/// parameter.
20+
/// To that end we always convert integers to little-endian format before
21+
/// hashing and the architecture dependent `isize` and `usize` types are
22+
/// extended to 64 bits if needed.
3423
pub struct StableHasher<W> {
3524
state: SipHasher128,
3625
bytes_hashed: u64,
@@ -86,9 +75,6 @@ impl<W> StableHasher<W> {
8675
}
8776
}
8877

89-
// For the non-u8 integer cases we leb128 encode them first. Because small
90-
// integers dominate, this significantly and cheaply reduces the number of
91-
// bytes hashed, which is good because blake2b is expensive.
9278
impl<W> Hasher for StableHasher<W> {
9379
fn finish(&self) -> u64 {
9480
panic!("use StableHasher::finalize instead");
@@ -132,8 +118,11 @@ impl<W> Hasher for StableHasher<W> {
132118

133119
#[inline]
134120
fn write_usize(&mut self, i: usize) {
135-
self.state.write_usize(i.to_le());
136-
self.bytes_hashed += ::std::mem::size_of::<usize>() as u64;
121+
// Always treat usize as u64 so we get the same results on 32 and 64 bit
122+
// platforms. This is important for symbol hashes when cross compiling,
123+
// for example.
124+
self.state.write_u64((i as u64).to_le());
125+
self.bytes_hashed += 8;
137126
}
138127

139128
#[inline]
@@ -168,8 +157,11 @@ impl<W> Hasher for StableHasher<W> {
168157

169158
#[inline]
170159
fn write_isize(&mut self, i: isize) {
171-
self.state.write_isize(i.to_le());
172-
self.bytes_hashed += ::std::mem::size_of::<isize>() as u64;
160+
// Always treat isize as i64 so we get the same results on 32 and 64 bit
161+
// platforms. This is important for symbol hashes when cross compiling,
162+
// for example.
163+
self.state.write_i64((i as i64).to_le());
164+
self.bytes_hashed += 8;
173165
}
174166
}
175167

0 commit comments

Comments
 (0)