From 23cb81109d589b6e415c161be6e37023ab29109d Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 13 Jun 2024 17:05:50 -0400 Subject: [PATCH 1/5] Reimplement RandomStringUtils on top of SecureRandom#getInstanceStrong() The previous implementation used ThreadLocalRandom#current() --- .../commons/lang3/RandomStringUtils.java | 64 +++++++++++-------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java index 9a05d74e598..ea7ca0ac97e 100644 --- a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java +++ b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java @@ -16,44 +16,56 @@ */ package org.apache.commons.lang3; +import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; import java.util.Random; import java.util.concurrent.ThreadLocalRandom; +import org.apache.commons.lang3.exception.UncheckedException; + /** * Generates random {@link String}s. + *

+ * Starting in version 3.15.0, this classes uses {@link SecureRandom#getInstanceStrong()}. + *

+ *

+ * Before version 3.15.0, this classes used {@link ThreadLocalRandom#current()}, which was NOT cryptographically secure. + *

+ *

+ * RandomStringUtils is intended for simple use cases. For more advanced use cases consider using Apache Commons Text's + * RandomStringGenerator + * instead. + *

+ *

+ * The Apache Commons project provides Commons RNG dedicated to pseudo-random number generation, + * that may be a better choice for applications with more stringent requirements (performance and/or correctness). + *

+ *

+ * Note that private high surrogate characters are ignored. These are Unicode characters that fall between the values 56192 (db80) and 56319 (dbff) as + * we don't know how to handle them. High and low surrogates are correctly dealt with - that is if a high surrogate is randomly chosen, 55296 (d800) to 56191 + * (db7f) then it is followed by a low surrogate. If a low surrogate is chosen, 56320 (dc00) to 57343 (dfff) then it is placed after a randomly chosen high + * surrogate. + *

+ *

+ * #ThreadSafe# + *

* - *

Caveat: Instances of {@link Random}, upon which the implementation of this - * class relies, are not cryptographically secure.

- * - *

RandomStringUtils is intended for simple use cases. For more advanced - * use cases consider using Apache Commons Text's - * - * RandomStringGenerator instead.

- * - *

The Apache Commons project provides - * Commons RNG dedicated to pseudo-random number generation, that may be - * a better choice for applications with more stringent requirements - * (performance and/or correctness).

- * - *

Note that private high surrogate characters are ignored. - * These are Unicode characters that fall between the values 56192 (db80) - * and 56319 (dbff) as we don't know how to handle them. - * High and low surrogates are correctly dealt with - that is if a - * high surrogate is randomly chosen, 55296 (d800) to 56191 (db7f) - * then it is followed by a low surrogate. If a low surrogate is chosen, - * 56320 (dc00) to 57343 (dfff) then it is placed after a randomly - * chosen high surrogate.

- * - *

#ThreadSafe#

* @since 1.0 */ public class RandomStringUtils { - private static ThreadLocalRandom random() { - return ThreadLocalRandom.current(); + private static final ThreadLocal RANDOM = ThreadLocal.withInitial(() -> { + try { + return SecureRandom.getInstanceStrong(); + } catch (NoSuchAlgorithmException e) { + throw new UncheckedException(e); + } + }); + + private static SecureRandom random() { + return RANDOM.get(); } - // Random /** * Creates a random string whose length is the number of characters * specified. From 55a70c84f3658fd3ad3de72649b511ff21dc7175 Mon Sep 17 00:00:00 2001 From: Fabrice Benhamouda Date: Fri, 14 Jun 2024 18:05:28 -0400 Subject: [PATCH 2/5] Performance optimizations for RandomStringUtils This commit improves the performance of RandomStringUtils: * Reduces the number of random bytes generated and the number of calls to the random number generator, by using a cache system `AmortizedRandomBits`. * Optimizes the case of alphanumerical strings, reducing the number of rejections in the rejection sampling. See comments in code for details. --- .../commons/lang3/AmortizedRandomBits.java | 92 +++++++++++++++++++ .../commons/lang3/RandomStringUtils.java | 63 ++++++++++++- .../lang3/AmortizedRandomBitsTest.java | 88 ++++++++++++++++++ .../commons/lang3/RandomStringUtilsTest.java | 25 +++++ 4 files changed, 266 insertions(+), 2 deletions(-) create mode 100644 src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java create mode 100644 src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java diff --git a/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java b/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java new file mode 100644 index 00000000000..45e8e3675e8 --- /dev/null +++ b/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.lang3; + +import java.util.Random; + +/** + * AmortizedRandomBits enable to generate random integers of specific bit length. + * + *

It is more efficient way than calling Random.nextInt(1 << nbBits). It uses a cache of + * cacheSize random bytes that it replenishes when it gets empty. This is especially beneficial for + * SecureRandom Drbg implementations that incur a constant cost at each randomness generation. It is + * not thread safe. + * + *

Used internally by RandomStringUtils. + */ +class AmortizedRandomBits { + private final Random random; + + private final byte[] cache; + + // bitIndex is the index of the next bit in the cache to be used + // bitIndex=0 means the cache is fully random and none of the bits have been used yet + // bitIndex=1 means that only the LSB of cache[0] has been used and all other bits can be used + // bitIndex=8 means that only the 8 bits of cache[0] has been used + private int bitIndex; + + /** + * @param cacheSize number of bytes cached (only affects performance) + * @param random random source + */ + AmortizedRandomBits(final int cacheSize, final Random random) { + if (cacheSize <= 0) { + throw new IllegalArgumentException("cacheSize must be positive"); + } + this.cache = new byte[cacheSize]; + this.random = random; + this.random.nextBytes(this.cache); + this.bitIndex = 0; + } + + /** + * nextBits returns a random integer with the number of bits specified + * + * @param bits number of bits to generate, MUST be between 1 and 32 + * @return random integer with {@code bits} bits + */ + public int nextBits(final int bits) { + if (bits > 32 || bits <= 0) { + throw new IllegalArgumentException("number of bits must be between 1 and 32"); + } + + int result = 0; + int generatedBits = 0; // number of generated bits up to now + + while (generatedBits < bits) { + if (bitIndex / 8 >= cache.length) { + // we exhausted the number of bits in the cache + // this should only happen if the bitIndex is exactly matching the cache length + assert bitIndex == cache.length * 8; + random.nextBytes(cache); + bitIndex = 0; + } + + // generatedBitsInIteration is the number of bits that we will generate + // in this iteration of the while loop + int generatedBitsInIteration = Math.min(8 - (bitIndex % 8), bits - generatedBits); + + result = result << generatedBitsInIteration; + result |= (cache[bitIndex / 8] >> (bitIndex % 8)) & ((1 << generatedBitsInIteration) - 1); + + generatedBits += generatedBitsInIteration; + bitIndex += generatedBitsInIteration; + } + + return result; + } +} diff --git a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java index ea7ca0ac97e..fdeb66f54f0 100644 --- a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java +++ b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java @@ -66,6 +66,14 @@ private static SecureRandom random() { return RANDOM.get(); } + private static final char[] ALPHANUMERICAL_CHARS = { + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' + }; + /** * Creates a random string whose length is the number of characters * specified. @@ -226,6 +234,42 @@ public static String random(int count, int start, int end, final boolean letters throw new IllegalArgumentException("Parameter end (" + end + ") must be greater than start (" + start + ")"); } + if (end > Character.MAX_CODE_POINT) { + // Technically, it should be `Character.MAX_CODE_POINT+1` as `end` is excluded + // But the character `Character.MAX_CODE_POINT` is private use, so it would anyway be excluded + end = Character.MAX_CODE_POINT; + } + + // Optimize generation of full alphanumerical characters + // Normally, we would need to pick a 7-bit integer, since gap = 'z' - '0' + 1 = 75 > 64 + // In turn, this would make us reject the sampling with probability 1 - 62 / 2^7 > 1 / 2 + // Instead we can pick directly from the right set of 62 characters, which requires + // picking a 6-bit integer and only rejecting with probability 2 / 64 = 1 / 32 + if (chars == null && letters && numbers && start <= '0' && end >= 'z' + 1) { + return random(count, 0, 0, false, false, ALPHANUMERICAL_CHARS, random); + } + + // Optimize start and end when filtering by letters and/or numbers: + // The range provided may be too large since we filter anyway afterward. + // Note the use of Math.min/max (as opposed to setting start to '0' for example), + // since it is possible the range start/end excludes some of the letters/numbers, + // e.g., it is possible that start already is '1' when numbers = true, and start + // needs to stay equal to '1' in that case. + if (chars == null) { + if (letters && numbers) { + start = Math.max('0', start); + end = Math.min('z' + 1, end); + } else if (numbers) { + // just numbers, no letters + start = Math.max('0', start); + end = Math.min('9' + 1, end); + } else if (letters) { + // just letters, no numbers + start = Math.max('A', start); + end = Math.min('z' + 1, end); + } + } + final int zeroDigitAscii = 48; final int firstLetterAscii = 65; @@ -237,11 +281,26 @@ public static String random(int count, int start, int end, final boolean letters final StringBuilder builder = new StringBuilder(count); final int gap = end - start; + final int gapBits = Integer.SIZE - Integer.numberOfLeadingZeros(gap); + // The size of the cache we use is an heuristic: + // about twice the number of bytes required if no rejection + // Ideally the cache size depends on multiple factor, including the cost of generating x bytes + // of randomness as well as the probability of rejection. It is however not easy to know + // those values programmatically for the general case. + final AmortizedRandomBits arb = new AmortizedRandomBits((count * gapBits + 3) / 5 + 10, random); while (count-- != 0) { + // Generate a random value between start (included) and end (excluded) + final int randomValue = arb.nextBits(gapBits) + start; + // Rejection sampling if value too large + if (randomValue >= end) { + count++; + continue; + } + final int codePoint; if (chars == null) { - codePoint = random.nextInt(gap) + start; + codePoint = randomValue; switch (Character.getType(codePoint)) { case Character.UNASSIGNED: @@ -252,7 +311,7 @@ public static String random(int count, int start, int end, final boolean letters } } else { - codePoint = chars[random.nextInt(gap) + start]; + codePoint = chars[randomValue]; } final int numberOfChars = Character.charCount(codePoint); diff --git a/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java b/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java new file mode 100644 index 00000000000..38766649093 --- /dev/null +++ b/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.lang3; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.Random; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class AmortizedRandomBitsTest { + /** MockRandom mocks a Random class nextBytes to use a specific list of outputs */ + private static class MockRandom extends Random { + private final byte[] outputs; + private int index; + + MockRandom(final byte[] outputs) { + super(); + this.outputs = outputs.clone(); + this.index = 0; + } + + @Override + public void nextBytes(byte[] bytes) { + if (index + bytes.length > outputs.length) { + throw new RuntimeException("not enough outputs given in MockRandom"); + } + System.arraycopy(outputs, index, bytes, 0, bytes.length); + index += bytes.length; + } + } + + @ParameterizedTest + @ValueSource(ints = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 32}) + public void testNext(int cacheSize) { + MockRandom random = new MockRandom(new byte[]{ + 0x11, 0x12, 0x13, 0x25, + (byte) 0xab, (byte) 0xcd, (byte) 0xef, (byte) 0xff, + 0x55, 0x44, 0x12, 0x34, + 0x56, 0x78, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + }); + + AmortizedRandomBits arb = new AmortizedRandomBits(cacheSize, random); + + assertThrows(IllegalArgumentException.class, () -> arb.nextBits(0)); + assertThrows(IllegalArgumentException.class, () -> arb.nextBits(33)); + + assertEquals(0x11, arb.nextBits(8)); + assertEquals(0x12, arb.nextBits(8)); + assertEquals(0x1325, arb.nextBits(16)); + + assertEquals((int) 0xabcdefff, arb.nextBits(32)); + + assertEquals(0x5, arb.nextBits(4)); + assertEquals(0x1, arb.nextBits(1)); + assertEquals(0x0, arb.nextBits(1)); + assertEquals(0x1, arb.nextBits(2)); + + assertEquals(0x4, arb.nextBits(6)); + + assertEquals(0x40000000 | (0x12345600 >> 2) | 0x38, arb.nextBits(32)); + + assertEquals(1, arb.nextBits(1)); + assertEquals(0, arb.nextBits(1)); + assertEquals(0, arb.nextBits(9)); + assertEquals(0, arb.nextBits(31)); + } +} diff --git a/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java b/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java index b4b8d6caeae..876a0d1463a 100644 --- a/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java +++ b/src/test/java/org/apache/commons/lang3/RandomStringUtilsTest.java @@ -27,6 +27,7 @@ import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.fail; import java.lang.reflect.Constructor; @@ -518,4 +519,28 @@ public void testRandomStringUtilsHomog() { // critical value: from scipy.stats import chi2; chi2(2).isf(1e-5) assertThat("test homogeneity -- will fail about 1 in 100,000 times", chiSquare(expected, counts), lessThan(23.025850929940457d)); } + + /** + * Test {@code RandomStringUtils.random} works appropriately when chars specified. + */ + @Test + void testRandomWithChars() { + final char[] digitChars = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}; + + String r1, r2, r3; + + r1 = RandomStringUtils.random(50, 0, 0, true, true, digitChars); + assertEquals(50, r1.length(), "randomNumeric(50)"); + for (int i = 0; i < r1.length(); i++) { + assertTrue( + Character.isDigit(r1.charAt(i)) && !Character.isLetter(r1.charAt(i)), + "r1 contains numeric"); + } + r2 = RandomStringUtils.randomNumeric(50); + assertNotEquals(r1, r2); + + r3 = RandomStringUtils.random(50, 0, 0, true, true, digitChars); + assertNotEquals(r1, r3); + assertNotEquals(r2, r3); + } } From cf5494597a6b024b48ec91a0d6f9a675fb418515 Mon Sep 17 00:00:00 2001 From: Fabrice Benhamouda Date: Mon, 17 Jun 2024 17:20:03 -0400 Subject: [PATCH 3/5] Code style and comment improvements * Fix 2 checkstyle errors. * Apply suggestions from review for garydgregory/commons-lang#2 * Improve comments in new (non-public) class `AmortizedRandomBits` to match comments in other classes. --- .../commons/lang3/AmortizedRandomBits.java | 37 +++++++++++++------ .../lang3/AmortizedRandomBitsTest.java | 11 ++++-- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java b/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java index 45e8e3675e8..42141b617e9 100644 --- a/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java +++ b/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java @@ -19,27 +19,42 @@ import java.util.Random; /** - * AmortizedRandomBits enable to generate random integers of specific bit length. + * Generates random integers of specific bit length. * - *

It is more efficient way than calling Random.nextInt(1 << nbBits). It uses a cache of - * cacheSize random bytes that it replenishes when it gets empty. This is especially beneficial for - * SecureRandom Drbg implementations that incur a constant cost at each randomness generation. It is - * not thread safe. + *

+ * It is more efficient than calling Random.nextInt(1 << nbBits). + * It uses a cache of cacheSize random bytes that it replenishes when it gets empty. + * This is especially beneficial for SecureRandom Drbg implementations, + * which incur a constant cost at each randomness generation. + *

* - *

Used internally by RandomStringUtils. + *

+ * Used internally by RandomStringUtils. + *

+ * + *

+ * #NotThreadSafe# + *

*/ class AmortizedRandomBits { private final Random random; private final byte[] cache; - // bitIndex is the index of the next bit in the cache to be used - // bitIndex=0 means the cache is fully random and none of the bits have been used yet - // bitIndex=1 means that only the LSB of cache[0] has been used and all other bits can be used - // bitIndex=8 means that only the 8 bits of cache[0] has been used + /** + * Index of the next bit in the cache to be used. + * + *
    + *
  • bitIndex=0 means the cache is fully random and none of the bits have been used yet.
  • + *
  • bitIndex=1 means that only the LSB of cache[0] has been used and all other bits can be used.
  • + *
  • bitIndex=8 means that only the 8 bits of cache[0] has been used.
  • + *
+ */ private int bitIndex; /** + * Creates an AmortizedRandomBits instance. + * * @param cacheSize number of bytes cached (only affects performance) * @param random random source */ @@ -54,7 +69,7 @@ class AmortizedRandomBits { } /** - * nextBits returns a random integer with the number of bits specified + * Generates a random integer with the specified number of bits. * * @param bits number of bits to generate, MUST be between 1 and 32 * @return random integer with {@code bits} bits diff --git a/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java b/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java index 38766649093..e809d2e6ebd 100644 --- a/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java +++ b/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java @@ -16,14 +16,17 @@ */ package org.apache.commons.lang3; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.ValueSource; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; import java.util.Random; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertThrows; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +/** + * Tests {@link AmortizedRandomBits}. + */ public class AmortizedRandomBitsTest { /** MockRandom mocks a Random class nextBytes to use a specific list of outputs */ private static class MockRandom extends Random { From 55ef38026f079050c7b51b2c81f09bc6dc57bc49 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Wed, 3 Jul 2024 18:25:55 -0400 Subject: [PATCH 4/5] Make class final - Rename package-private class - Whitespace - Add null check - Add serialVersionUID - Remove redunant type cast - Throw IllegalStateException, not RuntimeException - nextBytes() should throw NullPointerException per contract - Javadoc: Use longer lines --- ...dRandomBits.java => CachedRandomBits.java} | 22 +++++++------------ .../commons/lang3/RandomStringUtils.java | 2 +- ...itsTest.java => CachedRandomBitsTest.java} | 15 ++++++++----- 3 files changed, 19 insertions(+), 20 deletions(-) rename src/main/java/org/apache/commons/lang3/{AmortizedRandomBits.java => CachedRandomBits.java} (88%) rename src/test/java/org/apache/commons/lang3/{AmortizedRandomBitsTest.java => CachedRandomBitsTest.java} (87%) diff --git a/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java b/src/main/java/org/apache/commons/lang3/CachedRandomBits.java similarity index 88% rename from src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java rename to src/main/java/org/apache/commons/lang3/CachedRandomBits.java index 42141b617e9..6bdeaddd887 100644 --- a/src/main/java/org/apache/commons/lang3/AmortizedRandomBits.java +++ b/src/main/java/org/apache/commons/lang3/CachedRandomBits.java @@ -16,16 +16,15 @@ */ package org.apache.commons.lang3; +import java.util.Objects; import java.util.Random; /** * Generates random integers of specific bit length. * *

- * It is more efficient than calling Random.nextInt(1 << nbBits). - * It uses a cache of cacheSize random bytes that it replenishes when it gets empty. - * This is especially beneficial for SecureRandom Drbg implementations, - * which incur a constant cost at each randomness generation. + * It is more efficient than calling Random.nextInt(1 << nbBits). It uses a cache of cacheSize random bytes that it replenishes when it gets empty. This is + * especially beneficial for SecureRandom Drbg implementations, which incur a constant cost at each randomness generation. *

* *

@@ -36,7 +35,8 @@ * #NotThreadSafe# *

*/ -class AmortizedRandomBits { +final class CachedRandomBits { + private final Random random; private final byte[] cache; @@ -53,17 +53,17 @@ class AmortizedRandomBits { private int bitIndex; /** - * Creates an AmortizedRandomBits instance. + * Creates a new instance. * * @param cacheSize number of bytes cached (only affects performance) * @param random random source */ - AmortizedRandomBits(final int cacheSize, final Random random) { + CachedRandomBits(final int cacheSize, final Random random) { if (cacheSize <= 0) { throw new IllegalArgumentException("cacheSize must be positive"); } this.cache = new byte[cacheSize]; - this.random = random; + this.random = Objects.requireNonNull(random, "random"); this.random.nextBytes(this.cache); this.bitIndex = 0; } @@ -78,10 +78,8 @@ public int nextBits(final int bits) { if (bits > 32 || bits <= 0) { throw new IllegalArgumentException("number of bits must be between 1 and 32"); } - int result = 0; int generatedBits = 0; // number of generated bits up to now - while (generatedBits < bits) { if (bitIndex / 8 >= cache.length) { // we exhausted the number of bits in the cache @@ -90,18 +88,14 @@ public int nextBits(final int bits) { random.nextBytes(cache); bitIndex = 0; } - // generatedBitsInIteration is the number of bits that we will generate // in this iteration of the while loop int generatedBitsInIteration = Math.min(8 - (bitIndex % 8), bits - generatedBits); - result = result << generatedBitsInIteration; result |= (cache[bitIndex / 8] >> (bitIndex % 8)) & ((1 << generatedBitsInIteration) - 1); - generatedBits += generatedBitsInIteration; bitIndex += generatedBitsInIteration; } - return result; } } diff --git a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java index fdeb66f54f0..419e5f0c552 100644 --- a/src/main/java/org/apache/commons/lang3/RandomStringUtils.java +++ b/src/main/java/org/apache/commons/lang3/RandomStringUtils.java @@ -287,7 +287,7 @@ public static String random(int count, int start, int end, final boolean letters // Ideally the cache size depends on multiple factor, including the cost of generating x bytes // of randomness as well as the probability of rejection. It is however not easy to know // those values programmatically for the general case. - final AmortizedRandomBits arb = new AmortizedRandomBits((count * gapBits + 3) / 5 + 10, random); + final CachedRandomBits arb = new CachedRandomBits((count * gapBits + 3) / 5 + 10, random); while (count-- != 0) { // Generate a random value between start (included) and end (excluded) diff --git a/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java b/src/test/java/org/apache/commons/lang3/CachedRandomBitsTest.java similarity index 87% rename from src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java rename to src/test/java/org/apache/commons/lang3/CachedRandomBitsTest.java index e809d2e6ebd..6f8c8c9510a 100644 --- a/src/test/java/org/apache/commons/lang3/AmortizedRandomBitsTest.java +++ b/src/test/java/org/apache/commons/lang3/CachedRandomBitsTest.java @@ -19,17 +19,21 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import java.util.Objects; import java.util.Random; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.ValueSource; /** - * Tests {@link AmortizedRandomBits}. + * Tests {@link CachedRandomBits}. */ -public class AmortizedRandomBitsTest { +public class CachedRandomBitsTest { + /** MockRandom mocks a Random class nextBytes to use a specific list of outputs */ private static class MockRandom extends Random { + + private static final long serialVersionUID = 1L; private final byte[] outputs; private int index; @@ -41,8 +45,9 @@ private static class MockRandom extends Random { @Override public void nextBytes(byte[] bytes) { + Objects.requireNonNull(bytes, "bytes"); if (index + bytes.length > outputs.length) { - throw new RuntimeException("not enough outputs given in MockRandom"); + throw new IllegalStateException("Not enough outputs given in MockRandom"); } System.arraycopy(outputs, index, bytes, 0, bytes.length); index += bytes.length; @@ -63,7 +68,7 @@ public void testNext(int cacheSize) { 0x00, 0x00, 0x00, 0x00, }); - AmortizedRandomBits arb = new AmortizedRandomBits(cacheSize, random); + CachedRandomBits arb = new CachedRandomBits(cacheSize, random); assertThrows(IllegalArgumentException.class, () -> arb.nextBits(0)); assertThrows(IllegalArgumentException.class, () -> arb.nextBits(33)); @@ -72,7 +77,7 @@ public void testNext(int cacheSize) { assertEquals(0x12, arb.nextBits(8)); assertEquals(0x1325, arb.nextBits(16)); - assertEquals((int) 0xabcdefff, arb.nextBits(32)); + assertEquals(0xabcdefff, arb.nextBits(32)); assertEquals(0x5, arb.nextBits(4)); assertEquals(0x1, arb.nextBits(1)); From 9b938913065ddaa5d36db3e8bcb3187bebcec843 Mon Sep 17 00:00:00 2001 From: Gary Gregory Date: Thu, 4 Jul 2024 08:34:02 -0400 Subject: [PATCH 5/5] Apply comments by aherbert in https://github.com/apache/commons-lang/commit/55a70c84f3658fd3ad3de72649b511ff21dc7175#r143834333 --- .../java/org/apache/commons/lang3/CachedRandomBits.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/apache/commons/lang3/CachedRandomBits.java b/src/main/java/org/apache/commons/lang3/CachedRandomBits.java index 6bdeaddd887..78d0b458c8a 100644 --- a/src/main/java/org/apache/commons/lang3/CachedRandomBits.java +++ b/src/main/java/org/apache/commons/lang3/CachedRandomBits.java @@ -81,7 +81,7 @@ public int nextBits(final int bits) { int result = 0; int generatedBits = 0; // number of generated bits up to now while (generatedBits < bits) { - if (bitIndex / 8 >= cache.length) { + if (bitIndex >> 3 >= cache.length) { // we exhausted the number of bits in the cache // this should only happen if the bitIndex is exactly matching the cache length assert bitIndex == cache.length * 8; @@ -90,9 +90,9 @@ public int nextBits(final int bits) { } // generatedBitsInIteration is the number of bits that we will generate // in this iteration of the while loop - int generatedBitsInIteration = Math.min(8 - (bitIndex % 8), bits - generatedBits); + int generatedBitsInIteration = Math.min(8 - (bitIndex & 0x7), bits - generatedBits); result = result << generatedBitsInIteration; - result |= (cache[bitIndex / 8] >> (bitIndex % 8)) & ((1 << generatedBitsInIteration) - 1); + result |= (cache[bitIndex >> 3] >> (bitIndex & 0x7)) & ((1 << generatedBitsInIteration) - 1); generatedBits += generatedBitsInIteration; bitIndex += generatedBitsInIteration; }