-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Performance optimizations for RandomStringUtils
This commit improves the performance of RandomStringUtils: * Reduces the number of random bytes generated and the number of calls to the random number generator, by using a cache system `AmortizedRandomBits`. * Optimizes the case of alphanumerical strings, reducing the number of rejections in the rejection sampling. See comments in code for details.
- Loading branch information
Fabrice Benhamouda
committed
Jun 14, 2024
1 parent
23cb811
commit 55a70c8
Showing
4 changed files
with
266 additions
and
2 deletions.
There are no files selected for viewing
92 changes: 92 additions & 0 deletions
92
src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.commons.lang3; | ||
|
||
import java.util.Random; | ||
|
||
/** | ||
* AmortizedRandomBits enable to generate random integers of specific bit length. | ||
* | ||
* <p>It is more efficient way than calling Random.nextInt(1 << nbBits). It uses a cache of | ||
* cacheSize random bytes that it replenishes when it gets empty. This is especially beneficial for | ||
* SecureRandom Drbg implementations that incur a constant cost at each randomness generation. It is | ||
* not thread safe. | ||
* | ||
* <p>Used internally by RandomStringUtils. | ||
*/ | ||
class AmortizedRandomBits { | ||
private final Random random; | ||
|
||
private final byte[] cache; | ||
|
||
// bitIndex is the index of the next bit in the cache to be used | ||
// bitIndex=0 means the cache is fully random and none of the bits have been used yet | ||
// bitIndex=1 means that only the LSB of cache[0] has been used and all other bits can be used | ||
// bitIndex=8 means that only the 8 bits of cache[0] has been used | ||
private int bitIndex; | ||
|
||
/** | ||
* @param cacheSize number of bytes cached (only affects performance) | ||
* @param random random source | ||
*/ | ||
AmortizedRandomBits(final int cacheSize, final Random random) { | ||
if (cacheSize <= 0) { | ||
throw new IllegalArgumentException("cacheSize must be positive"); | ||
} | ||
this.cache = new byte[cacheSize]; | ||
this.random = random; | ||
this.random.nextBytes(this.cache); | ||
this.bitIndex = 0; | ||
} | ||
|
||
/** | ||
* nextBits returns a random integer with the number of bits specified | ||
* | ||
* @param bits number of bits to generate, MUST be between 1 and 32 | ||
* @return random integer with {@code bits} bits | ||
*/ | ||
public int nextBits(final int bits) { | ||
if (bits > 32 || bits <= 0) { | ||
throw new IllegalArgumentException("number of bits must be between 1 and 32"); | ||
} | ||
|
||
int result = 0; | ||
int generatedBits = 0; // number of generated bits up to now | ||
|
||
while (generatedBits < bits) { | ||
if (bitIndex / 8 >= cache.length) { | ||
This comment has been minimized.
Sorry, something went wrong. |
||
// we exhausted the number of bits in the cache | ||
// this should only happen if the bitIndex is exactly matching the cache length | ||
assert bitIndex == cache.length * 8; | ||
random.nextBytes(cache); | ||
bitIndex = 0; | ||
} | ||
|
||
// generatedBitsInIteration is the number of bits that we will generate | ||
// in this iteration of the while loop | ||
int generatedBitsInIteration = Math.min(8 - (bitIndex % 8), bits - generatedBits); | ||
|
||
result = result << generatedBitsInIteration; | ||
result |= (cache[bitIndex / 8] >> (bitIndex % 8)) & ((1 << generatedBitsInIteration) - 1); | ||
|
||
generatedBits += generatedBitsInIteration; | ||
bitIndex += generatedBitsInIteration; | ||
} | ||
|
||
return result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.commons.lang3; | ||
|
||
import org.junit.jupiter.params.ParameterizedTest; | ||
import org.junit.jupiter.params.provider.ValueSource; | ||
|
||
import java.util.Random; | ||
|
||
import static org.junit.jupiter.api.Assertions.assertEquals; | ||
import static org.junit.jupiter.api.Assertions.assertThrows; | ||
|
||
public class AmortizedRandomBitsTest { | ||
/** MockRandom mocks a Random class nextBytes to use a specific list of outputs */ | ||
private static class MockRandom extends Random { | ||
private final byte[] outputs; | ||
private int index; | ||
|
||
MockRandom(final byte[] outputs) { | ||
super(); | ||
this.outputs = outputs.clone(); | ||
this.index = 0; | ||
} | ||
|
||
@Override | ||
public void nextBytes(byte[] bytes) { | ||
if (index + bytes.length > outputs.length) { | ||
throw new RuntimeException("not enough outputs given in MockRandom"); | ||
} | ||
System.arraycopy(outputs, index, bytes, 0, bytes.length); | ||
index += bytes.length; | ||
} | ||
} | ||
|
||
@ParameterizedTest | ||
@ValueSource(ints = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 32}) | ||
public void testNext(int cacheSize) { | ||
MockRandom random = new MockRandom(new byte[]{ | ||
0x11, 0x12, 0x13, 0x25, | ||
(byte) 0xab, (byte) 0xcd, (byte) 0xef, (byte) 0xff, | ||
0x55, 0x44, 0x12, 0x34, | ||
0x56, 0x78, 0x00, 0x00, | ||
0x00, 0x00, 0x00, 0x00, | ||
0x00, 0x00, 0x00, 0x00, | ||
0x00, 0x00, 0x00, 0x00, | ||
0x00, 0x00, 0x00, 0x00, | ||
}); | ||
|
||
AmortizedRandomBits arb = new AmortizedRandomBits(cacheSize, random); | ||
|
||
assertThrows(IllegalArgumentException.class, () -> arb.nextBits(0)); | ||
assertThrows(IllegalArgumentException.class, () -> arb.nextBits(33)); | ||
|
||
assertEquals(0x11, arb.nextBits(8)); | ||
assertEquals(0x12, arb.nextBits(8)); | ||
assertEquals(0x1325, arb.nextBits(16)); | ||
|
||
assertEquals((int) 0xabcdefff, arb.nextBits(32)); | ||
|
||
assertEquals(0x5, arb.nextBits(4)); | ||
assertEquals(0x1, arb.nextBits(1)); | ||
assertEquals(0x0, arb.nextBits(1)); | ||
assertEquals(0x1, arb.nextBits(2)); | ||
|
||
assertEquals(0x4, arb.nextBits(6)); | ||
|
||
assertEquals(0x40000000 | (0x12345600 >> 2) | 0x38, arb.nextBits(32)); | ||
|
||
assertEquals(1, arb.nextBits(1)); | ||
assertEquals(0, arb.nextBits(1)); | ||
assertEquals(0, arb.nextBits(9)); | ||
assertEquals(0, arb.nextBits(31)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This class should replace: