Skip to content

Commit 0883996

Browse files
authored
Auto merge of #37427 - nnethercote:opt-IchHasher, r=michaelwoerister
Reduce the number of bytes hashed by IchHasher. IchHasher uses blake2b hashing, which is expensive, so the fewer bytes hashed the better. There are two big ways to reduce the number of bytes hashed. - Filenames in spans account for ~66% of all bytes (for builds with debuginfo). The vast majority of spans have the same filename for the start of the span and the end of the span, so hashing the filename just once in those cases is a big win. - u32 and u64 and usize values account for ~25%--33% of all bytes (for builds with debuginfo). The vast majority of these are small, i.e. fit in a u8, so shrinking them down before hashing is also a big win. This PR implements these two optimizations. I'm certain the first one is safe. I'm about 90% sure that the second one is safe. Here are measurements of the number of bytes hashed when doing debuginfo-enabled builds of stdlib and rustc-benchmarks/syntex-0.42.2-incr-clean. ``` stdlib syntex-incr ------ ----------- original 156,781,386 255,095,596 half-SawSpan 106,744,403 176,345,419 short-ints 45,890,534 118,014,227 no-SawSpan[*] 6,831,874 45,875,714 [*] don't hash the SawSpan at all. Not part of this PR, just implemented for comparison's sake. ``` For debug builds of syntex-0.42.2-incr-clean, the two changes give a 1--2% speed-up.
2 parents e96b9d2 + d73c68c commit 0883996

File tree

2 files changed

+68
-12
lines changed

2 files changed

+68
-12
lines changed

src/librustc_incremental/calculate_svh/hasher.rs

+37-1
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,16 @@
99
// except according to those terms.
1010

1111
use std::mem;
12+
use std::hash::Hasher;
1213
use rustc_data_structures::blake2b::Blake2bHasher;
1314
use rustc::ty::util::ArchIndependentHasher;
1415
use ich::Fingerprint;
16+
use rustc_serialize::leb128::write_unsigned_leb128;
1517

1618
#[derive(Debug)]
1719
pub struct IchHasher {
1820
state: ArchIndependentHasher<Blake2bHasher>,
21+
leb128_helper: Vec<u8>,
1922
bytes_hashed: u64,
2023
}
2124

@@ -24,6 +27,7 @@ impl IchHasher {
2427
let hash_size = mem::size_of::<Fingerprint>();
2528
IchHasher {
2629
state: ArchIndependentHasher::new(Blake2bHasher::new(hash_size, &[])),
30+
leb128_helper: vec![],
2731
bytes_hashed: 0
2832
}
2933
}
@@ -37,9 +41,19 @@ impl IchHasher {
3741
fingerprint.0.copy_from_slice(self.state.into_inner().finalize());
3842
fingerprint
3943
}
44+
45+
#[inline]
46+
fn write_uleb128(&mut self, value: u64) {
47+
let len = write_unsigned_leb128(&mut self.leb128_helper, 0, value);
48+
self.state.write(&self.leb128_helper[0..len]);
49+
self.bytes_hashed += len as u64;
50+
}
4051
}
4152

42-
impl ::std::hash::Hasher for IchHasher {
53+
// For the non-u8 integer cases we leb128 encode them first. Because small
54+
// integers dominate, this significantly and cheaply reduces the number of
55+
// bytes hashed, which is good because blake2b is expensive.
56+
impl Hasher for IchHasher {
4357
fn finish(&self) -> u64 {
4458
bug!("Use other finish() implementation to get the full 128-bit hash.");
4559
}
@@ -49,4 +63,26 @@ impl ::std::hash::Hasher for IchHasher {
4963
self.state.write(bytes);
5064
self.bytes_hashed += bytes.len() as u64;
5165
}
66+
67+
// There is no need to leb128-encode u8 values.
68+
69+
#[inline]
70+
fn write_u16(&mut self, i: u16) {
71+
self.write_uleb128(i as u64);
72+
}
73+
74+
#[inline]
75+
fn write_u32(&mut self, i: u32) {
76+
self.write_uleb128(i as u64);
77+
}
78+
79+
#[inline]
80+
fn write_u64(&mut self, i: u64) {
81+
self.write_uleb128(i);
82+
}
83+
84+
#[inline]
85+
fn write_usize(&mut self, i: usize) {
86+
self.write_uleb128(i as u64);
87+
}
5288
}

src/librustc_incremental/calculate_svh/svh_visitor.rs

+31-11
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,8 @@ impl<'a, 'hash, 'tcx> StrictVersionHashVisitor<'a, 'hash, 'tcx> {
8888
// within the CodeMap.
8989
// Also note that we are hashing byte offsets for the column, not unicode
9090
// codepoint offsets. For the purpose of the hash that's sufficient.
91+
// Also, hashing filenames is expensive so we avoid doing it twice when the
92+
// span starts and ends in the same file, which is almost always the case.
9193
fn hash_span(&mut self, span: Span) {
9294
debug!("hash_span: st={:?}", self.st);
9395

@@ -103,21 +105,35 @@ impl<'a, 'hash, 'tcx> StrictVersionHashVisitor<'a, 'hash, 'tcx> {
103105
span.hi
104106
};
105107

106-
let loc1 = self.codemap.byte_pos_to_line_and_col(span.lo);
107-
let loc2 = self.codemap.byte_pos_to_line_and_col(span_hi);
108-
109-
let expansion_kind = match span.expn_id {
108+
let expn_kind = match span.expn_id {
110109
NO_EXPANSION => SawSpanExpnKind::NoExpansion,
111110
COMMAND_LINE_EXPN => SawSpanExpnKind::CommandLine,
112111
_ => SawSpanExpnKind::SomeExpansion,
113112
};
114113

115-
SawSpan(loc1.as_ref().map(|&(ref fm, line, col)| (&fm.name[..], line, col)),
116-
loc2.as_ref().map(|&(ref fm, line, col)| (&fm.name[..], line, col)),
117-
expansion_kind)
118-
.hash(self.st);
114+
let loc1 = self.codemap.byte_pos_to_line_and_col(span.lo);
115+
let loc1 = loc1.as_ref()
116+
.map(|&(ref fm, line, col)| (&fm.name[..], line, col))
117+
.unwrap_or(("???", 0, BytePos(0)));
118+
119+
let loc2 = self.codemap.byte_pos_to_line_and_col(span_hi);
120+
let loc2 = loc2.as_ref()
121+
.map(|&(ref fm, line, col)| (&fm.name[..], line, col))
122+
.unwrap_or(("???", 0, BytePos(0)));
123+
124+
let saw = if loc1.0 == loc2.0 {
125+
SawSpan(loc1.0,
126+
loc1.1, loc1.2,
127+
loc2.1, loc2.2,
128+
expn_kind)
129+
} else {
130+
SawSpanTwoFiles(loc1.0, loc1.1, loc1.2,
131+
loc2.0, loc2.1, loc2.2,
132+
expn_kind)
133+
};
134+
saw.hash(self.st);
119135

120-
if expansion_kind == SawSpanExpnKind::SomeExpansion {
136+
if expn_kind == SawSpanExpnKind::SomeExpansion {
121137
let call_site = self.codemap.codemap().source_callsite(span);
122138
self.hash_span(call_site);
123139
}
@@ -189,9 +205,13 @@ enum SawAbiComponent<'a> {
189205
SawAssocTypeBinding,
190206
SawAttribute(ast::AttrStyle),
191207
SawMacroDef,
192-
SawSpan(Option<(&'a str, usize, BytePos)>,
193-
Option<(&'a str, usize, BytePos)>,
208+
SawSpan(&'a str,
209+
usize, BytePos,
210+
usize, BytePos,
194211
SawSpanExpnKind),
212+
SawSpanTwoFiles(&'a str, usize, BytePos,
213+
&'a str, usize, BytePos,
214+
SawSpanExpnKind),
195215
}
196216

197217
/// SawExprComponent carries all of the information that we want

0 commit comments

Comments
 (0)