Skip to content

Commit fc32303

Browse files
committedFeb 12, 2022
Auto merge of rust-lang#93671 - Kobzol:stable-hash-const, r=the8472
Use const generics in SipHasher128's short_write This was proposed by `@michaelwoerister` [here](rust-lang#93615 (comment)). A few comments: 1) I tried to pass `&[u8; LEN]` instead of `[u8; LEN]`. Locally, it resulted in small icount regressions (about 0.5 %). When passing by value, there were no regressions (and no improvements). 2) I wonder if we should use `to_ne_bytes()` in `SipHasher128` to keep it generic and only use `to_le_bytes()` in `StableHasher`. However, currently `SipHasher128` is only used in `StableHasher` and the `short_write` method was private, so I couldn't use it directly from `StableHasher`. Using `to_le()` in the `StableHasher` was breaking this abstraction boundary before slightly. ```rust debug_assert!(LEN <= 8); ``` This could be done at compile time, but actually I think that now we can remove this assert altogether. r? `@the8472`
2 parents f198510 + 5fc2e56 commit fc32303

File tree

2 files changed

+39
-46
lines changed

2 files changed

+39
-46
lines changed
 

‎compiler/rustc_data_structures/src/sip128.rs

+29-32
Original file line numberDiff line numberDiff line change
@@ -202,28 +202,26 @@ impl SipHasher128 {
202202
hasher
203203
}
204204

205-
// A specialized write function for values with size <= 8.
206205
#[inline]
207-
fn short_write<T>(&mut self, x: T) {
208-
let size = mem::size_of::<T>();
206+
pub fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
209207
let nbuf = self.nbuf;
210-
debug_assert!(size <= 8);
208+
debug_assert!(LEN <= 8);
211209
debug_assert!(nbuf < BUFFER_SIZE);
212-
debug_assert!(nbuf + size < BUFFER_WITH_SPILL_SIZE);
210+
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
213211

214-
if nbuf + size < BUFFER_SIZE {
212+
if nbuf + LEN < BUFFER_SIZE {
215213
unsafe {
216214
// The memcpy call is optimized away because the size is known.
217215
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
218-
ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size);
216+
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
219217
}
220218

221-
self.nbuf = nbuf + size;
219+
self.nbuf = nbuf + LEN;
222220

223221
return;
224222
}
225223

226-
unsafe { self.short_write_process_buffer(x) }
224+
unsafe { self.short_write_process_buffer(bytes) }
227225
}
228226

229227
// A specialized write function for values with size <= 8 that should only
@@ -233,18 +231,17 @@ impl SipHasher128 {
233231
// `self.nbuf` must cause `self.buf` to become fully initialized (and not
234232
// overflow) if it wasn't already.
235233
#[inline(never)]
236-
unsafe fn short_write_process_buffer<T>(&mut self, x: T) {
237-
let size = mem::size_of::<T>();
234+
unsafe fn short_write_process_buffer<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
238235
let nbuf = self.nbuf;
239-
debug_assert!(size <= 8);
236+
debug_assert!(LEN <= 8);
240237
debug_assert!(nbuf < BUFFER_SIZE);
241-
debug_assert!(nbuf + size >= BUFFER_SIZE);
242-
debug_assert!(nbuf + size < BUFFER_WITH_SPILL_SIZE);
238+
debug_assert!(nbuf + LEN >= BUFFER_SIZE);
239+
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
243240

244241
// Copy first part of input into end of buffer, possibly into spill
245242
// element. The memcpy call is optimized away because the size is known.
246243
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
247-
ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size);
244+
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
248245

249246
// Process buffer.
250247
for i in 0..BUFFER_CAPACITY {
@@ -254,17 +251,17 @@ impl SipHasher128 {
254251
self.state.v0 ^= elem;
255252
}
256253

257-
// Copy remaining input into start of buffer by copying size - 1
258-
// elements from spill (at most size - 1 bytes could have overflowed
254+
// Copy remaining input into start of buffer by copying LEN - 1
255+
// elements from spill (at most LEN - 1 bytes could have overflowed
259256
// into the spill). The memcpy call is optimized away because the size
260-
// is known. And the whole copy is optimized away for size == 1.
257+
// is known. And the whole copy is optimized away for LEN == 1.
261258
let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8;
262-
ptr::copy_nonoverlapping(src, self.buf.as_mut_ptr() as *mut u8, size - 1);
259+
ptr::copy_nonoverlapping(src, self.buf.as_mut_ptr() as *mut u8, LEN - 1);
263260

264261
// This function should only be called when the write fills the buffer.
265-
// Therefore, when size == 1, the new `self.nbuf` must be zero. The size
266-
// is statically known, so the branch is optimized away.
267-
self.nbuf = if size == 1 { 0 } else { nbuf + size - BUFFER_SIZE };
262+
// Therefore, when LEN == 1, the new `self.nbuf` must be zero.
263+
// LEN is statically known, so the branch is optimized away.
264+
self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE };
268265
self.processed += BUFFER_SIZE;
269266
}
270267

@@ -412,52 +409,52 @@ impl SipHasher128 {
412409
impl Hasher for SipHasher128 {
413410
#[inline]
414411
fn write_u8(&mut self, i: u8) {
415-
self.short_write(i);
412+
self.short_write(i.to_ne_bytes());
416413
}
417414

418415
#[inline]
419416
fn write_u16(&mut self, i: u16) {
420-
self.short_write(i);
417+
self.short_write(i.to_ne_bytes());
421418
}
422419

423420
#[inline]
424421
fn write_u32(&mut self, i: u32) {
425-
self.short_write(i);
422+
self.short_write(i.to_ne_bytes());
426423
}
427424

428425
#[inline]
429426
fn write_u64(&mut self, i: u64) {
430-
self.short_write(i);
427+
self.short_write(i.to_ne_bytes());
431428
}
432429

433430
#[inline]
434431
fn write_usize(&mut self, i: usize) {
435-
self.short_write(i);
432+
self.short_write(i.to_ne_bytes());
436433
}
437434

438435
#[inline]
439436
fn write_i8(&mut self, i: i8) {
440-
self.short_write(i as u8);
437+
self.short_write((i as u8).to_ne_bytes());
441438
}
442439

443440
#[inline]
444441
fn write_i16(&mut self, i: i16) {
445-
self.short_write(i as u16);
442+
self.short_write((i as u16).to_ne_bytes());
446443
}
447444

448445
#[inline]
449446
fn write_i32(&mut self, i: i32) {
450-
self.short_write(i as u32);
447+
self.short_write((i as u32).to_ne_bytes());
451448
}
452449

453450
#[inline]
454451
fn write_i64(&mut self, i: i64) {
455-
self.short_write(i as u64);
452+
self.short_write((i as u64).to_ne_bytes());
456453
}
457454

458455
#[inline]
459456
fn write_isize(&mut self, i: isize) {
460-
self.short_write(i as usize);
457+
self.short_write((i as usize).to_ne_bytes());
461458
}
462459

463460
#[inline]

‎compiler/rustc_data_structures/src/stable_hasher.rs

+10-14
Original file line numberDiff line numberDiff line change
@@ -80,30 +80,30 @@ impl Hasher for StableHasher {
8080

8181
#[inline]
8282
fn write_u16(&mut self, i: u16) {
83-
self.state.write_u16(i.to_le());
83+
self.state.short_write(i.to_le_bytes());
8484
}
8585

8686
#[inline]
8787
fn write_u32(&mut self, i: u32) {
88-
self.state.write_u32(i.to_le());
88+
self.state.short_write(i.to_le_bytes());
8989
}
9090

9191
#[inline]
9292
fn write_u64(&mut self, i: u64) {
93-
self.state.write_u64(i.to_le());
93+
self.state.short_write(i.to_le_bytes());
9494
}
9595

9696
#[inline]
9797
fn write_u128(&mut self, i: u128) {
98-
self.state.write_u128(i.to_le());
98+
self.state.write(&i.to_le_bytes());
9999
}
100100

101101
#[inline]
102102
fn write_usize(&mut self, i: usize) {
103103
// Always treat usize as u64 so we get the same results on 32 and 64 bit
104104
// platforms. This is important for symbol hashes when cross compiling,
105105
// for example.
106-
self.state.write_u64((i as u64).to_le());
106+
self.state.short_write((i as u64).to_le_bytes());
107107
}
108108

109109
#[inline]
@@ -113,22 +113,22 @@ impl Hasher for StableHasher {
113113

114114
#[inline]
115115
fn write_i16(&mut self, i: i16) {
116-
self.state.write_i16(i.to_le());
116+
self.state.short_write((i as u16).to_le_bytes());
117117
}
118118

119119
#[inline]
120120
fn write_i32(&mut self, i: i32) {
121-
self.state.write_i32(i.to_le());
121+
self.state.short_write((i as u32).to_le_bytes());
122122
}
123123

124124
#[inline]
125125
fn write_i64(&mut self, i: i64) {
126-
self.state.write_i64(i.to_le());
126+
self.state.short_write((i as u64).to_le_bytes());
127127
}
128128

129129
#[inline]
130130
fn write_i128(&mut self, i: i128) {
131-
self.state.write_i128(i.to_le());
131+
self.state.write(&(i as u128).to_le_bytes());
132132
}
133133

134134
#[inline]
@@ -144,7 +144,7 @@ impl Hasher for StableHasher {
144144
#[inline(never)]
145145
fn hash_value(state: &mut SipHasher128, value: u64) {
146146
state.write_u8(0xFF);
147-
state.write_u64(value.to_le());
147+
state.short_write(value.to_le_bytes());
148148
}
149149

150150
// `isize` values often seem to have a small (positive) numeric value in practice.
@@ -161,10 +161,6 @@ impl Hasher for StableHasher {
161161
// 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
162162
// `isize`s that fit within a different amount of bytes, they should always produce a different
163163
// byte stream for the hasher.
164-
//
165-
// To ensure that this optimization hashes the exact same bytes on both little-endian and
166-
// big-endian architectures, we compare the value with 0xFF before we convert the number
167-
// into a unified representation (little-endian).
168164
if value < 0xFF {
169165
self.state.write_u8(value as u8);
170166
} else {

0 commit comments

Comments
 (0)
Please sign in to comment.