-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix Hash impl on Rope and RopeSlice.
The previous impl misunderstood the Hasher API, and assumed that as long as the same data was passed in the same order, the hash output would be the same. But in fact, the data must be split among the `write()` calls exactly the same as well. This new impl fixes that by always passing the data in exact fixed-sized blocks, regardless of the underlying chunks boundaries of the rope. Co-authored-by: Pascal Kuthe <pascal.kuthe@semimod.de>
- Loading branch information
1 parent
69cfb1e
commit fef5be9
Showing
6 changed files
with
316 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
extern crate criterion; | ||
extern crate fnv; | ||
extern crate fxhash; | ||
extern crate ropey; | ||
|
||
use std::collections::hash_map::DefaultHasher; | ||
use std::hash::{Hash, Hasher}; | ||
|
||
use criterion::{black_box, criterion_group, criterion_main, Criterion}; | ||
use fnv::FnvHasher; | ||
use fxhash::FxHasher; | ||
use ropey::Rope; | ||
|
||
const TEXT: &str = include_str!("large.txt"); | ||
const TEXT_SMALL: &str = include_str!("small.txt"); | ||
const TEXT_TINY: &str = "hello"; | ||
|
||
//---- | ||
|
||
fn hash_large(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("hash_large"); | ||
|
||
group.bench_function("default", |bench| { | ||
let r = Rope::from_str(TEXT); | ||
bench.iter(|| { | ||
let mut hasher = DefaultHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
|
||
group.bench_function("fnv", |bench| { | ||
let r = Rope::from_str(TEXT); | ||
bench.iter(|| { | ||
let mut hasher = FnvHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
|
||
group.bench_function("fxhash", |bench| { | ||
let r = Rope::from_str(TEXT); | ||
bench.iter(|| { | ||
let mut hasher = FxHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
} | ||
|
||
fn hash_small(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("hash_small"); | ||
|
||
group.bench_function("default", |bench| { | ||
let r = Rope::from_str(TEXT_SMALL); | ||
bench.iter(|| { | ||
let mut hasher = DefaultHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
|
||
group.bench_function("fnv", |bench| { | ||
let r = Rope::from_str(TEXT_SMALL); | ||
bench.iter(|| { | ||
let mut hasher = FnvHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
|
||
group.bench_function("fxhash", |bench| { | ||
let r = Rope::from_str(TEXT_SMALL); | ||
bench.iter(|| { | ||
let mut hasher = FxHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
} | ||
|
||
fn hash_tiny(c: &mut Criterion) { | ||
let mut group = c.benchmark_group("hash_tiny"); | ||
|
||
group.bench_function("default", |bench| { | ||
let r = Rope::from_str(TEXT_TINY); | ||
bench.iter(|| { | ||
let mut hasher = DefaultHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
|
||
group.bench_function("fnv", |bench| { | ||
let r = Rope::from_str(TEXT_TINY); | ||
bench.iter(|| { | ||
let mut hasher = FnvHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
|
||
group.bench_function("fxhash", |bench| { | ||
let r = Rope::from_str(TEXT_TINY); | ||
bench.iter(|| { | ||
let mut hasher = FxHasher::default(); | ||
r.hash(black_box(&mut hasher)); | ||
black_box(hasher.finish()); | ||
}) | ||
}); | ||
} | ||
|
||
//---- | ||
|
||
criterion_group!(benches, hash_large, hash_small, hash_tiny,); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
extern crate ropey; | ||
|
||
use std::hash::{Hash, Hasher}; | ||
|
||
use ropey::{Rope, RopeBuilder}; | ||
|
||
const SMALL_TEXT: &str = include_str!("small_ascii.txt"); | ||
|
||
/// This is an example `Hasher` to demonstrate a property guaranteed by | ||
/// the documentation that is not exploited by the default `Hasher` (SipHash) | ||
/// Relevant excerpt from the `Hasher` documentation: | ||
/// > Nor can you assume that adjacent | ||
/// > `write` calls are merged, so it's possible, for example, that | ||
/// > ``` | ||
/// > # fn foo(hasher: &mut impl std::hash::Hasher) { | ||
/// > hasher.write(&[1, 2]); | ||
/// > hasher.write(&[3, 4, 5, 6]); | ||
/// > # } | ||
/// > ``` | ||
/// > and | ||
/// > ``` | ||
/// > # fn foo(hasher: &mut impl std::hash::Hasher) { | ||
/// > hasher.write(&[1, 2, 3, 4]); | ||
/// > hasher.write(&[5, 6]); | ||
/// > # } | ||
/// > ``` | ||
/// > end up producing different hashes. | ||
/// | ||
/// This dummy hasher simply collects all bytes and inserts a separator byte (0xFF) at the end of `write`. | ||
/// While this hasher might seem a little silly, it is perfectly inline with the std documentation. | ||
/// Many other commonly used high performance `Hasher`s (fxhash, ahash, fnvhash) exploit the same property | ||
/// to improve the performance of `write`, so violating this property will cause issues in practice. | ||
#[derive(Default)] | ||
struct TestHasher(std::collections::hash_map::DefaultHasher); | ||
impl Hasher for TestHasher { | ||
fn finish(&self) -> u64 { | ||
self.0.finish() | ||
} | ||
|
||
fn write(&mut self, bytes: &[u8]) { | ||
self.0.write(bytes); | ||
self.0.write_u8(0xFF); | ||
} | ||
} | ||
|
||
#[test] | ||
#[cfg_attr(miri, ignore)] | ||
fn hash_1() { | ||
// Build two ropes with the same contents but different chunk boundaries. | ||
let r1 = { | ||
let mut b = RopeBuilder::new(); | ||
b._append_chunk("Hello w"); | ||
b._append_chunk("orld"); | ||
b._finish_no_fix() | ||
}; | ||
let r2 = { | ||
let mut b = RopeBuilder::new(); | ||
b._append_chunk("Hell"); | ||
b._append_chunk("o world"); | ||
b._finish_no_fix() | ||
}; | ||
|
||
let mut hasher1 = TestHasher::default(); | ||
let mut hasher2 = TestHasher::default(); | ||
r1.hash(&mut hasher1); | ||
r2.hash(&mut hasher2); | ||
|
||
assert_eq!(hasher1.finish(), hasher2.finish()); | ||
} | ||
|
||
#[test] | ||
#[cfg_attr(miri, ignore)] | ||
fn hash_2() { | ||
// Build two ropes with the same contents but different chunk boundaries. | ||
let r1 = { | ||
let mut b = RopeBuilder::new(); | ||
for chunk in SMALL_TEXT.as_bytes().chunks(5) { | ||
b._append_chunk(std::str::from_utf8(chunk).unwrap()); | ||
} | ||
b._finish_no_fix() | ||
}; | ||
let r2 = { | ||
let mut b = RopeBuilder::new(); | ||
for chunk in SMALL_TEXT.as_bytes().chunks(7) { | ||
b._append_chunk(std::str::from_utf8(chunk).unwrap()); | ||
} | ||
b._finish_no_fix() | ||
}; | ||
|
||
for (l1, l2) in r1.lines().zip(r2.lines()) { | ||
let mut hasher1 = TestHasher::default(); | ||
let mut hasher2 = TestHasher::default(); | ||
l1.hash(&mut hasher1); | ||
l2.hash(&mut hasher2); | ||
|
||
assert_eq!(hasher1.finish(), hasher2.finish()); | ||
} | ||
} | ||
|
||
#[test] | ||
#[cfg_attr(miri, ignore)] | ||
fn hash_3() { | ||
// Build two ropes with the same contents but different chunk boundaries. | ||
let r1 = { | ||
let mut b = RopeBuilder::new(); | ||
for chunk in SMALL_TEXT.as_bytes().chunks(521) { | ||
b._append_chunk(std::str::from_utf8(chunk).unwrap()); | ||
} | ||
b._finish_no_fix() | ||
}; | ||
let r2 = { | ||
let mut b = RopeBuilder::new(); | ||
for chunk in SMALL_TEXT.as_bytes().chunks(547) { | ||
b._append_chunk(std::str::from_utf8(chunk).unwrap()); | ||
} | ||
b._finish_no_fix() | ||
}; | ||
|
||
let mut hasher1 = TestHasher::default(); | ||
let mut hasher2 = TestHasher::default(); | ||
r1.hash(&mut hasher1); | ||
r2.hash(&mut hasher2); | ||
|
||
assert_eq!(hasher1.finish(), hasher2.finish()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus | ||
nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla | ||
mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget | ||
ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue | ||
malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis | ||
mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta | ||
malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed | ||
egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis | ||
turpis. Vivamus vitae mauris sit amet massa mollis molestie. Morbi scelerisque, | ||
augue id congue imperdiet, felis lacus euismod dui, vitae facilisis massa dui quis | ||
sapien. Vivamus hendrerit a urna a lobortis. | ||
|
||
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Maecenas sit amet tellus | ||
nec turpis feugiat semper. Nam at nulla laoreet, finibus eros sit amet, fringilla | ||
mauris. Fusce vestibulum nec ligula efficitur laoreet. Nunc orci leo, varius eget | ||
ligula vulputate, consequat eleifend nisi. Cras justo purus, imperdiet a augue | ||
malesuada, convallis cursus libero. Fusce pretium arcu in elementum laoreet. Duis | ||
mauris nulla, suscipit at est nec, malesuada pellentesque eros. Quisque semper porta | ||
malesuada. Nunc hendrerit est ac faucibus mollis. Nam fermentum id libero sed | ||
egestas. Duis a accumsan sapien. Nam neque diam, congue non erat et, porta sagittis | ||
turpis. Vivamus vitae mauris sit amet massa mollis molestie. Morbi scelerisque, | ||
augue id congue imperdiet, felis lacus euismod dui, vitae facilisis massa dui quis | ||
sapien. Vivamus hendrerit a urna a lobortis. | ||
|