Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reimplement RandomStringUtils on top of SecureRandom#getInstanceStrong() #1235

Merged
merged 6 commits into from
Jul 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions src/main/java/org/apache/commons/lang3/CachedRandomBits.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.lang3;

import java.util.Objects;
import java.util.Random;

/**
* Generates random integers of specific bit length.
*
* <p>
* It is more efficient than calling Random.nextInt(1 << nbBits). It uses a cache of cacheSize random bytes that it replenishes when it gets empty. This is
* especially beneficial for SecureRandom Drbg implementations, which incur a constant cost at each randomness generation.
* </p>
*
* <p>
* Used internally by RandomStringUtils.
* </p>
*
* <p>
* #NotThreadSafe#
* </p>
*/
final class CachedRandomBits {

private final Random random;

private final byte[] cache;

/**
* Index of the next bit in the cache to be used.
*
* <ul>
* <li>bitIndex=0 means the cache is fully random and none of the bits have been used yet.</li>
* <li>bitIndex=1 means that only the LSB of cache[0] has been used and all other bits can be used.</li>
* <li>bitIndex=8 means that only the 8 bits of cache[0] has been used.</li>
* </ul>
*/
private int bitIndex;

/**
* Creates a new instance.
*
* @param cacheSize number of bytes cached (only affects performance)
* @param random random source
*/
CachedRandomBits(final int cacheSize, final Random random) {
if (cacheSize <= 0) {
throw new IllegalArgumentException("cacheSize must be positive");
}
this.cache = new byte[cacheSize];
this.random = Objects.requireNonNull(random, "random");
this.random.nextBytes(this.cache);
this.bitIndex = 0;
}

/**
* Generates a random integer with the specified number of bits.
*
* @param bits number of bits to generate, MUST be between 1 and 32
* @return random integer with {@code bits} bits
*/
public int nextBits(final int bits) {
if (bits > 32 || bits <= 0) {
throw new IllegalArgumentException("number of bits must be between 1 and 32");
}
int result = 0;
int generatedBits = 0; // number of generated bits up to now
while (generatedBits < bits) {
if (bitIndex >> 3 >= cache.length) {
// we exhausted the number of bits in the cache
// this should only happen if the bitIndex is exactly matching the cache length
assert bitIndex == cache.length * 8;
random.nextBytes(cache);
bitIndex = 0;
}
// generatedBitsInIteration is the number of bits that we will generate
// in this iteration of the while loop
int generatedBitsInIteration = Math.min(8 - (bitIndex & 0x7), bits - generatedBits);
result = result << generatedBitsInIteration;
result |= (cache[bitIndex >> 3] >> (bitIndex & 0x7)) & ((1 << generatedBitsInIteration) - 1);
generatedBits += generatedBitsInIteration;
bitIndex += generatedBitsInIteration;
}
return result;
}
}
127 changes: 99 additions & 28 deletions src/main/java/org/apache/commons/lang3/RandomStringUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,44 +16,64 @@
*/
package org.apache.commons.lang3;

import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.Random;
import java.util.concurrent.ThreadLocalRandom;

import org.apache.commons.lang3.exception.UncheckedException;

/**
* Generates random {@link String}s.
* <p>
* Starting in version 3.15.0, this classes uses {@link SecureRandom#getInstanceStrong()}.
* </p>
* <p>
* Before version 3.15.0, this classes used {@link ThreadLocalRandom#current()}, which was NOT cryptographically secure.
* </p>
* <p>
* RandomStringUtils is intended for simple use cases. For more advanced use cases consider using Apache Commons Text's
* <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/RandomStringGenerator.html"> RandomStringGenerator</a>
* instead.
* </p>
* <p>
* The Apache Commons project provides <a href="https://commons.apache.org/proper/commons-rng/">Commons RNG</a> dedicated to pseudo-random number generation,
* that may be a better choice for applications with more stringent requirements (performance and/or correctness).
* </p>
* <p>
* Note that <em>private high surrogate</em> characters are ignored. These are Unicode characters that fall between the values 56192 (db80) and 56319 (dbff) as
* we don't know how to handle them. High and low surrogates are correctly dealt with - that is if a high surrogate is randomly chosen, 55296 (d800) to 56191
* (db7f) then it is followed by a low surrogate. If a low surrogate is chosen, 56320 (dc00) to 57343 (dfff) then it is placed after a randomly chosen high
* surrogate.
* </p>
* <p>
* #ThreadSafe#
* </p>
*
* <p><b>Caveat: Instances of {@link Random}, upon which the implementation of this
* class relies, are not cryptographically secure.</b></p>
*
* <p>RandomStringUtils is intended for simple use cases. For more advanced
* use cases consider using Apache Commons Text's
* <a href="https://commons.apache.org/proper/commons-text/javadocs/api-release/org/apache/commons/text/RandomStringGenerator.html">
* RandomStringGenerator</a> instead.</p>
*
* <p>The Apache Commons project provides
* <a href="https://commons.apache.org/proper/commons-rng/">Commons RNG</a> dedicated to pseudo-random number generation, that may be
* a better choice for applications with more stringent requirements
* (performance and/or correctness).</p>
*
* <p>Note that <em>private high surrogate</em> characters are ignored.
* These are Unicode characters that fall between the values 56192 (db80)
* and 56319 (dbff) as we don't know how to handle them.
* High and low surrogates are correctly dealt with - that is if a
* high surrogate is randomly chosen, 55296 (d800) to 56191 (db7f)
* then it is followed by a low surrogate. If a low surrogate is chosen,
* 56320 (dc00) to 57343 (dfff) then it is placed after a randomly
* chosen high surrogate.</p>
*
* <p>#ThreadSafe#</p>
* @since 1.0
*/
public class RandomStringUtils {

private static ThreadLocalRandom random() {
return ThreadLocalRandom.current();
private static final ThreadLocal<SecureRandom> RANDOM = ThreadLocal.withInitial(() -> {
try {
return SecureRandom.getInstanceStrong();
} catch (NoSuchAlgorithmException e) {
throw new UncheckedException(e);
}
});

private static SecureRandom random() {
return RANDOM.get();
}

// Random
private static final char[] ALPHANUMERICAL_CHARS = {
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
};

/**
* Creates a random string whose length is the number of characters
* specified.
Expand Down Expand Up @@ -214,6 +234,42 @@ public static String random(int count, int start, int end, final boolean letters
throw new IllegalArgumentException("Parameter end (" + end + ") must be greater than start (" + start + ")");
}

if (end > Character.MAX_CODE_POINT) {
// Technically, it should be `Character.MAX_CODE_POINT+1` as `end` is excluded
// But the character `Character.MAX_CODE_POINT` is private use, so it would anyway be excluded
end = Character.MAX_CODE_POINT;
}

// Optimize generation of full alphanumerical characters
// Normally, we would need to pick a 7-bit integer, since gap = 'z' - '0' + 1 = 75 > 64
// In turn, this would make us reject the sampling with probability 1 - 62 / 2^7 > 1 / 2
// Instead we can pick directly from the right set of 62 characters, which requires
// picking a 6-bit integer and only rejecting with probability 2 / 64 = 1 / 32
if (chars == null && letters && numbers && start <= '0' && end >= 'z' + 1) {
return random(count, 0, 0, false, false, ALPHANUMERICAL_CHARS, random);
}

// Optimize start and end when filtering by letters and/or numbers:
// The range provided may be too large since we filter anyway afterward.
// Note the use of Math.min/max (as opposed to setting start to '0' for example),
// since it is possible the range start/end excludes some of the letters/numbers,
// e.g., it is possible that start already is '1' when numbers = true, and start
// needs to stay equal to '1' in that case.
if (chars == null) {
if (letters && numbers) {
start = Math.max('0', start);
end = Math.min('z' + 1, end);
} else if (numbers) {
// just numbers, no letters
start = Math.max('0', start);
end = Math.min('9' + 1, end);
} else if (letters) {
// just letters, no numbers
start = Math.max('A', start);
end = Math.min('z' + 1, end);
}
}

final int zeroDigitAscii = 48;
final int firstLetterAscii = 65;

Expand All @@ -225,11 +281,26 @@ public static String random(int count, int start, int end, final boolean letters

final StringBuilder builder = new StringBuilder(count);
final int gap = end - start;
final int gapBits = Integer.SIZE - Integer.numberOfLeadingZeros(gap);
// The size of the cache we use is an heuristic:
// about twice the number of bytes required if no rejection
// Ideally the cache size depends on multiple factor, including the cost of generating x bytes
// of randomness as well as the probability of rejection. It is however not easy to know
// those values programmatically for the general case.
final CachedRandomBits arb = new CachedRandomBits((count * gapBits + 3) / 5 + 10, random);

while (count-- != 0) {
// Generate a random value between start (included) and end (excluded)
final int randomValue = arb.nextBits(gapBits) + start;
// Rejection sampling if value too large
if (randomValue >= end) {
count++;
continue;
}

final int codePoint;
if (chars == null) {
codePoint = random.nextInt(gap) + start;
codePoint = randomValue;

switch (Character.getType(codePoint)) {
case Character.UNASSIGNED:
Expand All @@ -240,7 +311,7 @@ public static String random(int count, int start, int end, final boolean letters
}

} else {
codePoint = chars[random.nextInt(gap) + start];
codePoint = chars[randomValue];
}

final int numberOfChars = Character.charCount(codePoint);
Expand Down
96 changes: 96 additions & 0 deletions src/test/java/org/apache/commons/lang3/CachedRandomBitsTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.lang3;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import java.util.Objects;
import java.util.Random;

import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

/**
* Tests {@link CachedRandomBits}.
*/
public class CachedRandomBitsTest {

/** MockRandom mocks a Random class nextBytes to use a specific list of outputs */
private static class MockRandom extends Random {

private static final long serialVersionUID = 1L;
private final byte[] outputs;
private int index;

MockRandom(final byte[] outputs) {
super();
this.outputs = outputs.clone();
this.index = 0;
}

@Override
public void nextBytes(byte[] bytes) {
Objects.requireNonNull(bytes, "bytes");
if (index + bytes.length > outputs.length) {
throw new IllegalStateException("Not enough outputs given in MockRandom");
}
System.arraycopy(outputs, index, bytes, 0, bytes.length);
index += bytes.length;
}
}

@ParameterizedTest
@ValueSource(ints = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 32})
public void testNext(int cacheSize) {
MockRandom random = new MockRandom(new byte[]{
0x11, 0x12, 0x13, 0x25,
(byte) 0xab, (byte) 0xcd, (byte) 0xef, (byte) 0xff,
0x55, 0x44, 0x12, 0x34,
0x56, 0x78, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
});

CachedRandomBits arb = new CachedRandomBits(cacheSize, random);

assertThrows(IllegalArgumentException.class, () -> arb.nextBits(0));
assertThrows(IllegalArgumentException.class, () -> arb.nextBits(33));

assertEquals(0x11, arb.nextBits(8));
assertEquals(0x12, arb.nextBits(8));
assertEquals(0x1325, arb.nextBits(16));

assertEquals(0xabcdefff, arb.nextBits(32));

assertEquals(0x5, arb.nextBits(4));
assertEquals(0x1, arb.nextBits(1));
assertEquals(0x0, arb.nextBits(1));
assertEquals(0x1, arb.nextBits(2));

assertEquals(0x4, arb.nextBits(6));

assertEquals(0x40000000 | (0x12345600 >> 2) | 0x38, arb.nextBits(32));

assertEquals(1, arb.nextBits(1));
assertEquals(0, arb.nextBits(1));
assertEquals(0, arb.nextBits(9));
assertEquals(0, arb.nextBits(31));
}
}
Loading