-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve ISAAC performance #36
Changes from all commits
432270d
277b804
87cf51b
74d6b18
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
|
||
use core::num::Wrapping as w; | ||
use core::fmt; | ||
use core::cmp::min; | ||
use {Rng, CryptoRng, SeedFromRng, SeedableRng, Error}; | ||
|
||
#[allow(bad_style)] | ||
|
@@ -34,7 +35,7 @@ const CHACHA_ROUNDS: u32 = 20; // Cryptographically secure from 8 upwards as of | |
pub struct ChaChaRng { | ||
buffer: [w32; STATE_WORDS], // Internal buffer of output | ||
state: [w32; STATE_WORDS], // Initial state | ||
index: usize, // Index into state | ||
index: usize, // Index into state | ||
} | ||
|
||
// Custom Debug implementation that does not expose the internal state | ||
|
@@ -189,6 +190,31 @@ impl ChaChaRng { | |
if self.state[14] != w(0) { return }; | ||
self.state[15] = self.state[15] + w(1); | ||
} | ||
|
||
fn fill_chunk(&mut self, dest: &mut [u8]) -> usize { | ||
if self.index == STATE_WORDS { | ||
self.update(); | ||
} | ||
|
||
let available = (STATE_WORDS - self.index) * 4; | ||
let chunk_size_u8 = min(available, dest.len()); | ||
let chunk_size_u32 = (chunk_size_u8 + 3) / 4; | ||
|
||
// convert to LE: | ||
for ref mut x in self.buffer[self.index..self.index+chunk_size_u32].iter_mut() { | ||
**x = w((*x).0.to_le()); | ||
} | ||
|
||
let buf = unsafe { &*(&mut self.buffer as *mut [w32; STATE_WORDS] | ||
as *mut [u8; STATE_WORDS * 4]) }; | ||
|
||
let index = self.index * 4; | ||
let copy = &mut dest[0..chunk_size_u8]; | ||
copy.copy_from_slice(&buf[index..index+chunk_size_u8]); | ||
|
||
self.index += chunk_size_u32; | ||
chunk_size_u8 | ||
} | ||
} | ||
|
||
impl Rng for ChaChaRng { | ||
|
@@ -211,39 +237,11 @@ impl Rng for ChaChaRng { | |
::rand_core::impls::next_u128_via_u64(self) | ||
} | ||
|
||
// Custom implementation allowing larger reads from buffer is about 8% | ||
// faster than default implementation in my tests | ||
fn fill_bytes(&mut self, dest: &mut [u8]) { | ||
use core::cmp::min; | ||
use core::intrinsics::{transmute, copy_nonoverlapping}; | ||
|
||
let mut left = dest; | ||
while left.len() >= 4 { | ||
if self.index == STATE_WORDS { | ||
self.update(); | ||
} | ||
|
||
let words = min(left.len() / 4, STATE_WORDS - self.index); | ||
let (l, r) = {left}.split_at_mut(4 * words); | ||
left = r; | ||
|
||
// convert to LE: | ||
for ref mut x in self.buffer[self.index..self.index+words].iter_mut() { | ||
**x = w((*x).0.to_le()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think mutating the output buffer from ChaCha is correct... The output buffer is used for the next round. Have to think of something else. I have tried to set up big-endian testing several times, no success yet. Do you happen to know an easy way (that works on Fedora)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops, no it doesn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess you need an emulator. If you like I can try to set something up. https://stackoverflow.com/questions/3337896/imitate-emulate-a-big-endian-behavior-in-c There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you think we could get CI? With trust we would get big-endian testing for free. I have not made a pr yet, but last I checked we didn't build on Windows (small fix though). |
||
} | ||
|
||
unsafe{ copy_nonoverlapping( | ||
&self.buffer[self.index].0 as *const u32 as *const u8, | ||
l.as_mut_ptr(), | ||
words) }; | ||
self.index += words; | ||
} | ||
let n = left.len(); | ||
if n > 0 { | ||
let chunk: [u8; 4] = unsafe { | ||
transmute(self.next_u32().to_le()) | ||
}; | ||
left.copy_from_slice(&chunk[..n]); | ||
let mut read_len = 0; | ||
while read_len < dest.len() { | ||
let chunk_len = self.fill_chunk(&mut dest[read_len..]); | ||
read_len += chunk_len; | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ use core::slice; | |
use core::iter::repeat; | ||
use core::num::Wrapping as w; | ||
use core::fmt; | ||
use core::cmp::min; | ||
|
||
use {Rng, SeedFromRng, SeedableRng, Error}; | ||
|
||
|
@@ -87,7 +88,7 @@ const RAND_SIZE: usize = 1 << RAND_SIZE_LEN; | |
/// [3]: Jean-Philippe Aumasson, [*On the pseudo-random generator ISAAC*] | ||
/// (http://eprint.iacr.org/2006/438) | ||
pub struct IsaacRng { | ||
rsl: [w32; RAND_SIZE], | ||
rsl: [u32; RAND_SIZE], | ||
mem: [w32; RAND_SIZE], | ||
a: w32, | ||
b: w32, | ||
|
@@ -175,7 +176,7 @@ impl IsaacRng { | |
let y = *a + *b + ind(&ctx.mem, x, 2); | ||
ctx.mem[base + m] = y; | ||
*b = x + ind(&ctx.mem, y, 2 + RAND_SIZE_LEN); | ||
ctx.rsl[base + m] = *b; | ||
ctx.rsl[base + m] = (*b).0; | ||
} | ||
|
||
let mut m = 0; | ||
|
@@ -200,12 +201,42 @@ impl IsaacRng { | |
self.b = b; | ||
self.cnt = RAND_SIZE as u32; | ||
} | ||
|
||
fn fill_chunk(&mut self, dest: &mut [u8]) -> usize { | ||
if self.cnt < 1 { | ||
self.isaac(); | ||
} | ||
|
||
let mut index_u32 = self.cnt as usize; | ||
let available = index_u32 * 4; | ||
let chunk_size_u8 = min(available, dest.len()); | ||
let chunk_size_u32 = (chunk_size_u8 + 3) / 4; | ||
|
||
index_u32 -= chunk_size_u32; | ||
let index_u8 = index_u32 * 4; | ||
|
||
// convert to LE: | ||
if cfg!(target_endian = "big") { | ||
for ref mut x in self.rsl[index_u32..(index_u32 + chunk_size_u32)].iter_mut() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought so too, but without it the benchmarks where slower. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I ran them and they were the same for me. Weird. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will test it again. If it is not necessary, it is better without :-) |
||
**x = (*x).to_le(); | ||
} | ||
} | ||
|
||
let rsl = unsafe { &*(&mut self.rsl as *mut [u32; RAND_SIZE] | ||
as *mut [u8; RAND_SIZE * 4]) }; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again, |
||
|
||
let copy = &mut dest[0..chunk_size_u8]; | ||
copy.copy_from_slice(&rsl[index_u8..(index_u8 + chunk_size_u8)]); | ||
|
||
self.cnt = index_u32 as u32; | ||
chunk_size_u8 | ||
} | ||
} | ||
|
||
impl Rng for IsaacRng { | ||
#[inline] | ||
fn next_u32(&mut self) -> u32 { | ||
if self.cnt == 0 { | ||
if self.cnt < 1 { | ||
// make some more numbers | ||
self.isaac(); | ||
} | ||
|
@@ -222,7 +253,7 @@ impl Rng for IsaacRng { | |
// (the % is cheaply telling the optimiser that we're always | ||
// in bounds, without unsafe. NB. this is a power of two, so | ||
// it optimises to a bitwise mask). | ||
self.rsl[self.cnt as usize % RAND_SIZE].0 | ||
self.rsl[self.cnt as usize % RAND_SIZE] | ||
} | ||
|
||
fn next_u64(&mut self) -> u64 { | ||
|
@@ -235,7 +266,11 @@ impl Rng for IsaacRng { | |
} | ||
|
||
fn fill_bytes(&mut self, dest: &mut [u8]) { | ||
::rand_core::impls::fill_bytes_via_u32(self, dest); | ||
let mut read_len = 0; | ||
while read_len < dest.len() { | ||
let chunk_len = self.fill_chunk(&mut dest[read_len..]); | ||
read_len += chunk_len; | ||
} | ||
} | ||
|
||
fn try_fill(&mut self, dest: &mut [u8]) -> Result<(), Error> { | ||
|
@@ -300,7 +335,7 @@ fn init(mut mem: [w32; RAND_SIZE], rounds: u32) -> IsaacRng { | |
} | ||
|
||
let mut rng = IsaacRng { | ||
rsl: [w(0); RAND_SIZE], | ||
rsl: [0; RAND_SIZE], | ||
mem: mem, | ||
a: w(0), | ||
b: w(0), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can replace
*mut
with*const
here (with&self.buffer
).