Skip to content

Commit

Permalink
Loop unrolling
Browse files Browse the repository at this point in the history
  • Loading branch information
mohanson committed Dec 3, 2024
1 parent c1fc494 commit 964eb96
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 32 deletions.
125 changes: 109 additions & 16 deletions src/blake2b.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const BLAKE2B_R2: u32 = 24;
const BLAKE2B_R3: u32 = 16;
const BLAKE2B_R4: u32 = 63;

/// Interpretation of bytes as words.
/// Interpretation of bytes as words. On little endian platforms, rust will automatically optimize this function.
fn interp_hb2w(b: &[u8; BLAKE2B_NN]) -> [u64; BLAKE2B_NN / 8] {
let mut w = [0; BLAKE2B_NN / 8];
let mut u = [0; 8];
Expand All @@ -49,7 +49,7 @@ fn interp_hb2w(b: &[u8; BLAKE2B_NN]) -> [u64; BLAKE2B_NN / 8] {
w
}

/// Interpretation of words as bytes.
/// Interpretation of words as bytes. On little endian platforms, rust will automatically optimize this function.
fn interp_hw2b(w: &[u64; BLAKE2B_NN / 8]) -> [u8; BLAKE2B_NN] {
let mut b = [0; BLAKE2B_NN];
for i in 0..w.len() {
Expand All @@ -58,7 +58,7 @@ fn interp_hw2b(w: &[u64; BLAKE2B_NN / 8]) -> [u8; BLAKE2B_NN] {
b
}

/// Interpretation of bytes as words.
/// Interpretation of bytes as words. On little endian platforms, rust will automatically optimize this function.
fn interp_bb2w(b: &[u8; BLAKE2B_BB]) -> [u64; BLAKE2B_BB / 8] {
let mut w = [0; BLAKE2B_BB / 8];
let mut u = [0; 8];
Expand Down Expand Up @@ -94,19 +94,112 @@ fn reduce(h: &mut [u64; 8], m: &[u64; 16], t: &[u64; 2], f: &[u64; 2]) {
v[0x0d] ^= t[1];
v[0x0e] ^= f[0];
v[0x0f] ^= f[1];
for s in BLAKE2B_SIGMA {
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[s[0x0] as usize], m[s[0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[s[0x2] as usize], m[s[0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[s[0x4] as usize], m[s[0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[s[0x6] as usize], m[s[0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[s[0x8] as usize], m[s[0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[s[0xa] as usize], m[s[0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[s[0xc] as usize], m[s[0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[s[0xe] as usize], m[s[0xf] as usize]);
}
for i in 0..8 {
h[i] = h[i] ^ v[i] ^ v[i + 8]
}

mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x0][0x0] as usize], m[BLAKE2B_SIGMA[0x0][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x0][0x2] as usize], m[BLAKE2B_SIGMA[0x0][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x0][0x4] as usize], m[BLAKE2B_SIGMA[0x0][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x0][0x6] as usize], m[BLAKE2B_SIGMA[0x0][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x0][0x8] as usize], m[BLAKE2B_SIGMA[0x0][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x0][0xa] as usize], m[BLAKE2B_SIGMA[0x0][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x0][0xc] as usize], m[BLAKE2B_SIGMA[0x0][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x0][0xe] as usize], m[BLAKE2B_SIGMA[0x0][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x1][0x0] as usize], m[BLAKE2B_SIGMA[0x1][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x1][0x2] as usize], m[BLAKE2B_SIGMA[0x1][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x1][0x4] as usize], m[BLAKE2B_SIGMA[0x1][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x1][0x6] as usize], m[BLAKE2B_SIGMA[0x1][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x1][0x8] as usize], m[BLAKE2B_SIGMA[0x1][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x1][0xa] as usize], m[BLAKE2B_SIGMA[0x1][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x1][0xc] as usize], m[BLAKE2B_SIGMA[0x1][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x1][0xe] as usize], m[BLAKE2B_SIGMA[0x1][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x2][0x0] as usize], m[BLAKE2B_SIGMA[0x2][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x2][0x2] as usize], m[BLAKE2B_SIGMA[0x2][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x2][0x4] as usize], m[BLAKE2B_SIGMA[0x2][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x2][0x6] as usize], m[BLAKE2B_SIGMA[0x2][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x2][0x8] as usize], m[BLAKE2B_SIGMA[0x2][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x2][0xa] as usize], m[BLAKE2B_SIGMA[0x2][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x2][0xc] as usize], m[BLAKE2B_SIGMA[0x2][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x2][0xe] as usize], m[BLAKE2B_SIGMA[0x2][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x3][0x0] as usize], m[BLAKE2B_SIGMA[0x3][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x3][0x2] as usize], m[BLAKE2B_SIGMA[0x3][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x3][0x4] as usize], m[BLAKE2B_SIGMA[0x3][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x3][0x6] as usize], m[BLAKE2B_SIGMA[0x3][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x3][0x8] as usize], m[BLAKE2B_SIGMA[0x3][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x3][0xa] as usize], m[BLAKE2B_SIGMA[0x3][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x3][0xc] as usize], m[BLAKE2B_SIGMA[0x3][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x3][0xe] as usize], m[BLAKE2B_SIGMA[0x3][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x4][0x0] as usize], m[BLAKE2B_SIGMA[0x4][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x4][0x2] as usize], m[BLAKE2B_SIGMA[0x4][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x4][0x4] as usize], m[BLAKE2B_SIGMA[0x4][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x4][0x6] as usize], m[BLAKE2B_SIGMA[0x4][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x4][0x8] as usize], m[BLAKE2B_SIGMA[0x4][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x4][0xa] as usize], m[BLAKE2B_SIGMA[0x4][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x4][0xc] as usize], m[BLAKE2B_SIGMA[0x4][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x4][0xe] as usize], m[BLAKE2B_SIGMA[0x4][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x5][0x0] as usize], m[BLAKE2B_SIGMA[0x5][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x5][0x2] as usize], m[BLAKE2B_SIGMA[0x5][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x5][0x4] as usize], m[BLAKE2B_SIGMA[0x5][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x5][0x6] as usize], m[BLAKE2B_SIGMA[0x5][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x5][0x8] as usize], m[BLAKE2B_SIGMA[0x5][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x5][0xa] as usize], m[BLAKE2B_SIGMA[0x5][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x5][0xc] as usize], m[BLAKE2B_SIGMA[0x5][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x5][0xe] as usize], m[BLAKE2B_SIGMA[0x5][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x6][0x0] as usize], m[BLAKE2B_SIGMA[0x6][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x6][0x2] as usize], m[BLAKE2B_SIGMA[0x6][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x6][0x4] as usize], m[BLAKE2B_SIGMA[0x6][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x6][0x6] as usize], m[BLAKE2B_SIGMA[0x6][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x6][0x8] as usize], m[BLAKE2B_SIGMA[0x6][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x6][0xa] as usize], m[BLAKE2B_SIGMA[0x6][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x6][0xc] as usize], m[BLAKE2B_SIGMA[0x6][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x6][0xe] as usize], m[BLAKE2B_SIGMA[0x6][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x7][0x0] as usize], m[BLAKE2B_SIGMA[0x7][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x7][0x2] as usize], m[BLAKE2B_SIGMA[0x7][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x7][0x4] as usize], m[BLAKE2B_SIGMA[0x7][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x7][0x6] as usize], m[BLAKE2B_SIGMA[0x7][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x7][0x8] as usize], m[BLAKE2B_SIGMA[0x7][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x7][0xa] as usize], m[BLAKE2B_SIGMA[0x7][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x7][0xc] as usize], m[BLAKE2B_SIGMA[0x7][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x7][0xe] as usize], m[BLAKE2B_SIGMA[0x7][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x8][0x0] as usize], m[BLAKE2B_SIGMA[0x8][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x8][0x2] as usize], m[BLAKE2B_SIGMA[0x8][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x8][0x4] as usize], m[BLAKE2B_SIGMA[0x8][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x8][0x6] as usize], m[BLAKE2B_SIGMA[0x8][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x8][0x8] as usize], m[BLAKE2B_SIGMA[0x8][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x8][0xa] as usize], m[BLAKE2B_SIGMA[0x8][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x8][0xc] as usize], m[BLAKE2B_SIGMA[0x8][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x8][0xe] as usize], m[BLAKE2B_SIGMA[0x8][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0x9][0x0] as usize], m[BLAKE2B_SIGMA[0x9][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0x9][0x2] as usize], m[BLAKE2B_SIGMA[0x9][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0x9][0x4] as usize], m[BLAKE2B_SIGMA[0x9][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0x9][0x6] as usize], m[BLAKE2B_SIGMA[0x9][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0x9][0x8] as usize], m[BLAKE2B_SIGMA[0x9][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0x9][0xa] as usize], m[BLAKE2B_SIGMA[0x9][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0x9][0xc] as usize], m[BLAKE2B_SIGMA[0x9][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0x9][0xe] as usize], m[BLAKE2B_SIGMA[0x9][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0xa][0x0] as usize], m[BLAKE2B_SIGMA[0xa][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0xa][0x2] as usize], m[BLAKE2B_SIGMA[0xa][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0xa][0x4] as usize], m[BLAKE2B_SIGMA[0xa][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0xa][0x6] as usize], m[BLAKE2B_SIGMA[0xa][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0xa][0x8] as usize], m[BLAKE2B_SIGMA[0xa][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0xa][0xa] as usize], m[BLAKE2B_SIGMA[0xa][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0xa][0xc] as usize], m[BLAKE2B_SIGMA[0xa][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0xa][0xe] as usize], m[BLAKE2B_SIGMA[0xa][0xf] as usize]);
mixing(&mut v, 0x0, 0x4, 0x8, 0xc, m[BLAKE2B_SIGMA[0xb][0x0] as usize], m[BLAKE2B_SIGMA[0xb][0x1] as usize]);
mixing(&mut v, 0x1, 0x5, 0x9, 0xd, m[BLAKE2B_SIGMA[0xb][0x2] as usize], m[BLAKE2B_SIGMA[0xb][0x3] as usize]);
mixing(&mut v, 0x2, 0x6, 0xa, 0xe, m[BLAKE2B_SIGMA[0xb][0x4] as usize], m[BLAKE2B_SIGMA[0xb][0x5] as usize]);
mixing(&mut v, 0x3, 0x7, 0xb, 0xf, m[BLAKE2B_SIGMA[0xb][0x6] as usize], m[BLAKE2B_SIGMA[0xb][0x7] as usize]);
mixing(&mut v, 0x0, 0x5, 0xa, 0xf, m[BLAKE2B_SIGMA[0xb][0x8] as usize], m[BLAKE2B_SIGMA[0xb][0x9] as usize]);
mixing(&mut v, 0x1, 0x6, 0xb, 0xc, m[BLAKE2B_SIGMA[0xb][0xa] as usize], m[BLAKE2B_SIGMA[0xb][0xb] as usize]);
mixing(&mut v, 0x2, 0x7, 0x8, 0xd, m[BLAKE2B_SIGMA[0xb][0xc] as usize], m[BLAKE2B_SIGMA[0xb][0xd] as usize]);
mixing(&mut v, 0x3, 0x4, 0x9, 0xe, m[BLAKE2B_SIGMA[0xb][0xe] as usize], m[BLAKE2B_SIGMA[0xb][0xf] as usize]);

h[0x0] = h[0x0] ^ v[0x0] ^ v[0x8];
h[0x1] = h[0x1] ^ v[0x1] ^ v[0x9];
h[0x2] = h[0x2] ^ v[0x2] ^ v[0xa];
h[0x3] = h[0x3] ^ v[0x3] ^ v[0xb];
h[0x4] = h[0x4] ^ v[0x4] ^ v[0xc];
h[0x5] = h[0x5] ^ v[0x5] ^ v[0xd];
h[0x6] = h[0x6] ^ v[0x6] ^ v[0xe];
h[0x7] = h[0x7] ^ v[0x7] ^ v[0xf];
}

/// Add n to message byte offset.
Expand Down
Loading

0 comments on commit 964eb96

Please sign in to comment.