Skip to content

Commit 7343291

Browse files
authoredOct 19, 2016
Rollup merge of rust-lang#37233 - michaelwoerister:blake2-for-ich, r=nikomatsakis
ICH: Use 128-bit Blake2b hash instead of 64-bit SipHash for incr. comp. fingerprints This PR makes incr. comp. hashes 128 bits wide in order to push collision probability below a threshold that we need to worry about. It also replaces SipHash, which has been mentioned multiple times as not being built for fingerprinting, with the [BLAKE2b hash function](https://blake2.net/), an improved version of the BLAKE sha-3 finalist. I was worried that using a cryptographic hash function would make ICH computation noticeably slower, but after doing some performance tests, I'm not any more. Most of the time BLAKE2b is actually faster than using two SipHashes (in order to get 128 bits): ``` SipHash libcore: 0.199 seconds libstd: 0.090 seconds BLAKE2b libcore: 0.162 seconds libstd: 0.078 seconds ``` If someone can prove that something like MetroHash128 provides a comparably low collision probability as BLAKE2, I'm happy to switch. But for now we are at least not taking a performance hit. I also suggest that we throw out the sha-256 implementation in the compiler and replace it with BLAKE2, since our sha-256 implementation is two to three times slower than the BLAKE2 implementation in this PR (cc @alexcrichton @eddyb @brson) r? @nikomatsakis (although there's not much incr. comp. specific in here, so feel free to re-assign)
2 parents 6ae80c6 + d07523c commit 7343291

File tree

14 files changed

+460
-38
lines changed

14 files changed

+460
-38
lines changed
 
+286
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
12+
// An implementation of the Blake2b cryptographic hash function.
13+
// The implementation closely follows: https://tools.ietf.org/html/rfc7693
14+
//
15+
// "BLAKE2 is a cryptographic hash function faster than MD5, SHA-1, SHA-2, and
16+
// SHA-3, yet is at least as secure as the latest standard SHA-3."
17+
// according to their own website :)
18+
//
19+
// Indeed this implementation is two to three times as fast as our SHA-256
20+
// implementation. If you have the luxury of being able to use crates from
21+
// crates.io, you can go there and find still faster implementations.
22+
23+
pub struct Blake2bCtx {
24+
b: [u8; 128],
25+
h: [u64; 8],
26+
t: [u64; 2],
27+
c: usize,
28+
outlen: usize,
29+
}
30+
31+
impl ::std::fmt::Debug for Blake2bCtx {
32+
fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> Result<(), ::std::fmt::Error> {
33+
write!(fmt, "{:?}", self.h)
34+
}
35+
}
36+
37+
#[inline(always)]
38+
fn b2b_g(v: &mut [u64; 16],
39+
a: usize,
40+
b: usize,
41+
c: usize,
42+
d: usize,
43+
x: u64,
44+
y: u64)
45+
{
46+
v[a] = v[a].wrapping_add(v[b]).wrapping_add(x);
47+
v[d] = (v[d] ^ v[a]).rotate_right(32);
48+
v[c] = v[c].wrapping_add(v[d]);
49+
v[b] = (v[b] ^ v[c]).rotate_right(24);
50+
v[a] = v[a].wrapping_add(v[b]).wrapping_add(y);
51+
v[d] = (v[d] ^ v[a]).rotate_right(16);
52+
v[c] = v[c].wrapping_add(v[d]);
53+
v[b] = (v[b] ^ v[c]).rotate_right(63);
54+
}
55+
56+
// Initialization vector
57+
const BLAKE2B_IV: [u64; 8] = [
58+
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
59+
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
60+
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
61+
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
62+
];
63+
64+
fn blake2b_compress(ctx: &mut Blake2bCtx, last: bool) {
65+
66+
const SIGMA: [[usize; 16]; 12] = [
67+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
68+
[14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 ],
69+
[11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 ],
70+
[7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 ],
71+
[9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 ],
72+
[2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 ],
73+
[12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 ],
74+
[13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 ],
75+
[6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 ],
76+
[10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 ],
77+
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ],
78+
[14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 ]
79+
];
80+
81+
let mut v: [u64; 16] = [
82+
ctx.h[0],
83+
ctx.h[1],
84+
ctx.h[2],
85+
ctx.h[3],
86+
ctx.h[4],
87+
ctx.h[5],
88+
ctx.h[6],
89+
ctx.h[7],
90+
91+
BLAKE2B_IV[0],
92+
BLAKE2B_IV[1],
93+
BLAKE2B_IV[2],
94+
BLAKE2B_IV[3],
95+
BLAKE2B_IV[4],
96+
BLAKE2B_IV[5],
97+
BLAKE2B_IV[6],
98+
BLAKE2B_IV[7],
99+
];
100+
101+
v[12] ^= ctx.t[0]; // low 64 bits of offset
102+
v[13] ^= ctx.t[1]; // high 64 bits
103+
if last {
104+
v[14] = !v[14];
105+
}
106+
107+
{
108+
// Re-interpret the input buffer in the state as u64s
109+
let m: &mut [u64; 16] = unsafe {
110+
let b: &mut [u8; 128] = &mut ctx.b;
111+
::std::mem::transmute(b)
112+
};
113+
114+
// It's OK to modify the buffer in place since this is the last time
115+
// this data will be accessed before it's overwritten
116+
if cfg!(target_endian = "big") {
117+
for word in &mut m[..] {
118+
*word = word.to_be();
119+
}
120+
}
121+
122+
for i in 0 .. 12 {
123+
b2b_g(&mut v, 0, 4, 8, 12, m[SIGMA[i][ 0]], m[SIGMA[i][ 1]]);
124+
b2b_g(&mut v, 1, 5, 9, 13, m[SIGMA[i][ 2]], m[SIGMA[i][ 3]]);
125+
b2b_g(&mut v, 2, 6, 10, 14, m[SIGMA[i][ 4]], m[SIGMA[i][ 5]]);
126+
b2b_g(&mut v, 3, 7, 11, 15, m[SIGMA[i][ 6]], m[SIGMA[i][ 7]]);
127+
b2b_g(&mut v, 0, 5, 10, 15, m[SIGMA[i][ 8]], m[SIGMA[i][ 9]]);
128+
b2b_g(&mut v, 1, 6, 11, 12, m[SIGMA[i][10]], m[SIGMA[i][11]]);
129+
b2b_g(&mut v, 2, 7, 8, 13, m[SIGMA[i][12]], m[SIGMA[i][13]]);
130+
b2b_g(&mut v, 3, 4, 9, 14, m[SIGMA[i][14]], m[SIGMA[i][15]]);
131+
}
132+
}
133+
134+
for i in 0 .. 8 {
135+
ctx.h[i] ^= v[i] ^ v[i + 8];
136+
}
137+
}
138+
139+
pub fn blake2b_new(outlen: usize, key: &[u8]) -> Blake2bCtx {
140+
assert!(outlen > 0 && outlen <= 64 && key.len() <= 64);
141+
142+
let mut ctx = Blake2bCtx {
143+
b: [0; 128],
144+
h: BLAKE2B_IV,
145+
t: [0; 2],
146+
c: 0,
147+
outlen: outlen,
148+
};
149+
150+
ctx.h[0] ^= 0x01010000 ^ ((key.len() << 8) as u64) ^ (outlen as u64);
151+
152+
if key.len() > 0 {
153+
blake2b_update(&mut ctx, key);
154+
ctx.c = ctx.b.len();
155+
}
156+
157+
ctx
158+
}
159+
160+
pub fn blake2b_update(ctx: &mut Blake2bCtx, mut data: &[u8])
161+
{
162+
let mut bytes_to_copy = data.len();
163+
let mut space_in_buffer = ctx.b.len() - ctx.c;
164+
165+
while bytes_to_copy > space_in_buffer {
166+
checked_mem_copy(data, &mut ctx.b[ctx.c .. ], space_in_buffer);
167+
168+
ctx.t[0] = ctx.t[0].wrapping_add(ctx.b.len() as u64);
169+
if ctx.t[0] < (ctx.b.len() as u64) {
170+
ctx.t[1] += 1;
171+
}
172+
blake2b_compress(ctx, false);
173+
ctx.c = 0;
174+
175+
data = &data[space_in_buffer .. ];
176+
bytes_to_copy -= space_in_buffer;
177+
space_in_buffer = ctx.b.len();
178+
}
179+
180+
if bytes_to_copy > 0 {
181+
checked_mem_copy(data, &mut ctx.b[ctx.c .. ], bytes_to_copy);
182+
ctx.c += bytes_to_copy;
183+
}
184+
}
185+
186+
pub fn blake2b_final(mut ctx: Blake2bCtx, out: &mut [u8])
187+
{
188+
ctx.t[0] = ctx.t[0].wrapping_add(ctx.c as u64);
189+
if ctx.t[0] < ctx.c as u64 {
190+
ctx.t[1] += 1;
191+
}
192+
193+
while ctx.c < 128 {
194+
ctx.b[ctx.c] = 0;
195+
ctx.c += 1;
196+
}
197+
198+
blake2b_compress(&mut ctx, true);
199+
200+
if cfg!(target_endian = "big") {
201+
// Make sure that the data is in memory in little endian format, as is
202+
// demanded by BLAKE2
203+
for word in &mut ctx.h {
204+
*word = word.to_le();
205+
}
206+
}
207+
208+
checked_mem_copy(&ctx.h, out, ctx.outlen);
209+
}
210+
211+
#[inline(always)]
212+
fn checked_mem_copy<T1, T2>(from: &[T1], to: &mut [T2], byte_count: usize) {
213+
let from_size = from.len() * ::std::mem::size_of::<T1>();
214+
let to_size = to.len() * ::std::mem::size_of::<T2>();
215+
assert!(from_size >= byte_count);
216+
assert!(to_size >= byte_count);
217+
let from_byte_ptr = from.as_ptr() as * const u8;
218+
let to_byte_ptr = to.as_mut_ptr() as * mut u8;
219+
unsafe {
220+
::std::ptr::copy_nonoverlapping(from_byte_ptr, to_byte_ptr, byte_count);
221+
}
222+
}
223+
224+
pub fn blake2b(out: &mut [u8], key: &[u8], data: &[u8])
225+
{
226+
let mut ctx = blake2b_new(out.len(), key);
227+
blake2b_update(&mut ctx, data);
228+
blake2b_final(ctx, out);
229+
}
230+
231+
#[cfg(test)]
232+
fn selftest_seq(out: &mut [u8], seed: u32)
233+
{
234+
let mut a: u32 = 0xDEAD4BADu32.wrapping_mul(seed);
235+
let mut b: u32 = 1;
236+
237+
for i in 0 .. out.len() {
238+
let t: u32 = a.wrapping_add(b);
239+
a = b;
240+
b = t;
241+
out[i] = ((t >> 24) & 0xFF) as u8;
242+
}
243+
}
244+
245+
#[test]
246+
fn blake2b_selftest()
247+
{
248+
// grand hash of hash results
249+
const BLAKE2B_RES: [u8; 32] = [
250+
0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD,
251+
0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56,
252+
0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73,
253+
0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75
254+
];
255+
256+
// parameter sets
257+
const B2B_MD_LEN: [usize; 4] = [20, 32, 48, 64];
258+
const B2B_IN_LEN: [usize; 6] = [0, 3, 128, 129, 255, 1024];
259+
260+
let mut data = [0u8; 1024];
261+
let mut md = [0u8; 64];
262+
let mut key = [0u8; 64];
263+
264+
let mut ctx = blake2b_new(32, &[]);
265+
266+
for i in 0 .. 4 {
267+
let outlen = B2B_MD_LEN[i];
268+
for j in 0 .. 6 {
269+
let inlen = B2B_IN_LEN[j];
270+
271+
selftest_seq(&mut data[.. inlen], inlen as u32); // unkeyed hash
272+
blake2b(&mut md[.. outlen], &[], &data[.. inlen]);
273+
blake2b_update(&mut ctx, &md[.. outlen]); // hash the hash
274+
275+
selftest_seq(&mut key[0 .. outlen], outlen as u32); // keyed hash
276+
blake2b(&mut md[.. outlen], &key[.. outlen], &data[.. inlen]);
277+
blake2b_update(&mut ctx, &md[.. outlen]); // hash the hash
278+
}
279+
}
280+
281+
// compute and compare the hash of hashes
282+
blake2b_final(ctx, &mut md[..]);
283+
for i in 0 .. 32 {
284+
assert_eq!(md[i], BLAKE2B_RES[i]);
285+
}
286+
}

‎src/librustc_data_structures/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ extern crate serialize as rustc_serialize; // used by deriving
4242
extern crate libc;
4343

4444
pub mod bitslice;
45+
pub mod blake2b;
4546
pub mod bitvec;
4647
pub mod graph;
4748
pub mod ivar;

‎src/librustc_incremental/calculate_svh/hasher.rs

+34-8
Original file line numberDiff line numberDiff line change
@@ -9,38 +9,64 @@
99
// except according to those terms.
1010

1111
use std::hash::Hasher;
12-
use std::collections::hash_map::DefaultHasher;
12+
use std::mem;
13+
use rustc_data_structures::blake2b;
14+
use ich::Fingerprint;
1315

1416
#[derive(Debug)]
1517
pub struct IchHasher {
16-
// FIXME: this should use SHA1, not DefaultHasher. DefaultHasher is not
17-
// built to avoid collisions.
18-
state: DefaultHasher,
18+
state: blake2b::Blake2bCtx,
1919
bytes_hashed: u64,
2020
}
2121

2222
impl IchHasher {
2323
pub fn new() -> IchHasher {
2424
IchHasher {
25-
state: DefaultHasher::new(),
25+
state: blake2b::blake2b_new(mem::size_of::<Fingerprint>(), &[]),
2626
bytes_hashed: 0
2727
}
2828
}
2929

3030
pub fn bytes_hashed(&self) -> u64 {
3131
self.bytes_hashed
3232
}
33+
34+
pub fn finish(self) -> Fingerprint {
35+
let mut fingerprint = Fingerprint::zero();
36+
blake2b::blake2b_final(self.state, &mut fingerprint.0);
37+
fingerprint
38+
}
3339
}
3440

3541
impl Hasher for IchHasher {
36-
#[inline]
3742
fn finish(&self) -> u64 {
38-
self.state.finish()
43+
bug!("Use other finish() implementation to get the full 128-bit hash.");
3944
}
4045

4146
#[inline]
4247
fn write(&mut self, bytes: &[u8]) {
43-
self.state.write(bytes);
48+
blake2b::blake2b_update(&mut self.state, bytes);
4449
self.bytes_hashed += bytes.len() as u64;
4550
}
51+
52+
#[inline]
53+
fn write_u16(&mut self, i: u16) {
54+
self.write(&unsafe { mem::transmute::<_, [u8; 2]>(i.to_le()) })
55+
}
56+
57+
#[inline]
58+
fn write_u32(&mut self, i: u32) {
59+
self.write(&unsafe { mem::transmute::<_, [u8; 4]>(i.to_le()) })
60+
}
61+
62+
#[inline]
63+
fn write_u64(&mut self, i: u64) {
64+
self.write(&unsafe { mem::transmute::<_, [u8; 8]>(i.to_le()) })
65+
}
66+
67+
#[inline]
68+
fn write_usize(&mut self, i: usize) {
69+
// always hash as u64, so we don't depend on the size of `usize`
70+
self.write_u64(i as u64);
71+
}
4672
}

0 commit comments

Comments
 (0)
Please sign in to comment.