Skip to content

Commit

Permalink
GH-524: Poly1305Mac: avoid needless byte copying
Browse files Browse the repository at this point in the history
The update() implementation copied _all_ bytes (successively) first into
an internal 16-byte buffer and then processed that buffer. This is no
needed if the input is long.

Use the internal 16-byte buffer only for inputs shorter than 16 bytes,
or if there is a leftover of less than 16 bytes at the end of a long
input. In between process 16-byte chunks directly from the input byte
array.

For 32kB inputs this saves us some 2048 calls to System.arraycopy()
copying all those 32kB. The speedup is minimal but noticeable in
benchmarking.

Bug: #524
  • Loading branch information
tomaswolf committed Jul 25, 2024
1 parent f60f759 commit 2b93f5c
Showing 1 changed file with 35 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,36 @@ public void init(byte[] key) throws Exception {
k1 = unpackIntLE(key, 20);
k2 = unpackIntLE(key, 24);
k3 = unpackIntLE(key, 28);

currentBlockOffset = 0;
}

@Override
public void update(byte[] in, int offset, int length) {
while (length > 0) {
if (currentBlockOffset == BLOCK_SIZE) {
processBlock();
}

if (currentBlockOffset > 0) {
// There is a partially filled block.
int toCopy = Math.min(length, BLOCK_SIZE - currentBlockOffset);
System.arraycopy(in, offset, currentBlock, currentBlockOffset, toCopy);
offset += toCopy;
length -= toCopy;
currentBlockOffset += toCopy;
if (currentBlockOffset == BLOCK_SIZE) {
processBlock(currentBlock, 0, BLOCK_SIZE);
currentBlockOffset = 0;
}
if (length == 0) {
return;
}
}
while (length >= BLOCK_SIZE) {
processBlock(in, offset, BLOCK_SIZE);
offset += BLOCK_SIZE;
length -= BLOCK_SIZE;
}
if (length > 0) {
// Put remaining bytes into internal buffer (length < BLOCK_SIZE here).
System.arraycopy(in, offset, currentBlock, 0, length);
currentBlockOffset = length;
}
}

Expand All @@ -125,7 +141,14 @@ public void doFinal(byte[] out, int offset) throws Exception {
throw new BufferOverflowException();
}
if (currentBlockOffset > 0) {
processBlock();
if (currentBlockOffset < BLOCK_SIZE) {
// padding
currentBlock[currentBlockOffset] = 1;
for (int i = currentBlockOffset + 1; i < BLOCK_SIZE; i++) {
currentBlock[i] = 0;
}
}
processBlock(currentBlock, 0, currentBlockOffset);
}

h1 += h0 >>> 26;
Expand Down Expand Up @@ -179,27 +202,20 @@ public void doFinal(byte[] out, int offset) throws Exception {
reset();
}

private void processBlock() {
if (currentBlockOffset < BLOCK_SIZE) {
// padding
currentBlock[currentBlockOffset] = 1;
for (int i = currentBlockOffset + 1; i < BLOCK_SIZE; i++) {
currentBlock[i] = 0;
}
}
private void processBlock(byte[] block, int offset, int length) {

int t0 = unpackIntLE(currentBlock, 0);
int t1 = unpackIntLE(currentBlock, 4);
int t2 = unpackIntLE(currentBlock, 8);
int t3 = unpackIntLE(currentBlock, 12);
int t0 = unpackIntLE(block, offset);
int t1 = unpackIntLE(block, offset + 4);
int t2 = unpackIntLE(block, offset + 8);
int t3 = unpackIntLE(block, offset + 12);

h0 += t0 & 0x3ffffff;
h1 += (t0 >>> 26 | t1 << 6) & 0x3ffffff;
h2 += (t1 >>> 20 | t2 << 12) & 0x3ffffff;
h3 += (t2 >>> 14 | t3 << 18) & 0x3ffffff;
h4 += t3 >>> 8;

if (currentBlockOffset == BLOCK_SIZE) {
if (length == BLOCK_SIZE) {
h4 += 1 << 24;
}

Expand All @@ -226,8 +242,6 @@ private void processBlock() {
h0 += (int) (tp4 >>> 26) * 5;
h1 += h0 >>> 26;
h0 &= 0x3ffffff;

currentBlockOffset = 0;
}

private void reset() {
Expand Down

0 comments on commit 2b93f5c

Please sign in to comment.