Skip to content

Commit 88c88e0

Browse files
committed
Use unbiased ranges in shuffle, move to separate function
1 parent 16b74d8 commit 88c88e0

File tree

2 files changed

+54
-63
lines changed

2 files changed

+54
-63
lines changed

src/lib.rs

+1-62
Original file line numberDiff line numberDiff line change
@@ -626,68 +626,7 @@ pub trait Rng: RngCore {
626626
/// println!("{:?}", y);
627627
/// ```
628628
fn shuffle<T>(&mut self, values: &mut [T]) {
629-
use distributions::range::WideningMultiply;
630-
// In theory this function is nothing more then:
631-
// ```
632-
// while i >= 2 {
633-
// i -= 1;
634-
// values.swap(i, self.gen_range(0, i + 1));`
635-
// }
636-
// ```
637-
//
638-
// `gen_range` samples completely unbiased from the range. It can be 4x
639-
// as fast if only we could sample with a tiny bias. Using a modulus to
640-
// reduce the range will always show a bias towards the lower numbers
641-
// in the range.
642-
//
643-
// But we use the widening multiply method. Here the bias is evenly
644-
// distributed over all numbers in the range. Also for every
645-
// iteration the range is 1 less then the previous iteration, and
646-
// the numbers that might have a bias are different. So besides the
647-
// biases being very tiny, they also partly cancel out each other.
648-
//
649-
// Said differently, we only have to worry about biases when a whole
650-
// group together shows a bias, as in the modulus method.
651-
652-
// We also optimize for slices of different length: huge slices with
653-
// more than 2^32 elements (usize indexes), large slices with more than
654-
// 2^16 elements, and medium to small slices. This allows us to make
655-
// better use of the bits generated by the RNG.
656-
let mut i = values.len();
657-
while i > core::u32::MAX as usize || i & 1 == 1 {
658-
// Invariant: elements with index >= i have been locked in place.
659-
i -= 1;
660-
// Lock element i in place, by swapping it with a random element in
661-
// the range [0, i] (inclusive).
662-
let (j, _) = self.gen::<u64>().wmul((i + 1) as u64);
663-
values.swap(i, j as usize);
664-
}
665-
let mut i = i as u32;
666-
667-
while i > core::u16::MAX as u32 || i & 2 == 2 {
668-
let r = self.gen::<u64>();
669-
let r1 = r as u32;
670-
let r2 = (r >> 32) as u32;
671-
672-
i -= 1;
673-
let (j, _) = r1.wmul(i + 1);
674-
values.swap(i as usize, j as usize);
675-
676-
i -= 1;
677-
let (j, _) = r2.wmul(i + 1);
678-
values.swap(i as usize, j as usize);
679-
}
680-
681-
let mut i = i as u16;
682-
while i >= 2 {
683-
let mut r = self.gen::<u64>();
684-
for _ in 0..4 {
685-
i -= 1;
686-
let (j, _) = (r as u16).wmul(i + 1);
687-
values.swap(i as usize, j as usize);
688-
r = r >> 16;
689-
}
690-
}
629+
seq::shuffle(self, values)
691630
}
692631
}
693632

src/seq.rs

+53-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010

1111
//! Functions for randomly accessing and sampling sequences.
1212
13-
use super::Rng;
13+
use Rng;
14+
use distributions::range::WideningMultiply;
1415

1516
// This crate is only enabled when either std or alloc is available.
1617
// BTreeMap is not as fast in tests, but better than nothing.
@@ -224,6 +225,57 @@ fn sample_indices_cache<R>(
224225
out
225226
}
226227

228+
pub(crate) fn shuffle<R, T>(rng: &mut R, values: &mut [T])
229+
where R: Rng + ?Sized {
230+
// In theory this function is nothing more then:
231+
// ```
232+
// while i > 1 {
233+
// // invariant: elements with index >= i have been locked in place.
234+
// i -= 1;
235+
// // lock element i in place.
236+
// values.swap(i, self.gen_range(0, i + 1));
237+
// }
238+
// ```
239+
//
240+
// We optimize for slices of different, because generating ranges is
241+
// faster for smaller integers. Less bits from the RNG are necessary,
242+
// and multiplies are faster.
243+
//
244+
// We don't switch exactly at the boundary between integer sizes,
245+
// because right below the integer boundary there is a very large zone
246+
// of values that have to be rejected to avoid bias, 25~50%.
247+
let mut i = values.len() as u64;
248+
while i > (1 << 31) {
249+
i -= 1;
250+
values.swap(i as usize, rng.gen_range(0, i + 1) as usize);
251+
}
252+
let mut i = i as u32;
253+
while i > (1 << 15) {
254+
i -= 1;
255+
values.swap(i as usize, rng.gen_range(0, i + 1) as usize);
256+
}
257+
let mut i = i as u16;
258+
while i > 4 {
259+
// Reimplement the range reduction here, because we can do better
260+
// than generating 32 bits and throwing away half of them.
261+
let mut value: u64 = rng.gen();
262+
for _ in 0..4 {
263+
let val = value as u16;
264+
value = value >> 16;
265+
let (hi, lo) = val.wmul(i);
266+
let zone = ::core::u16::MAX - (::core::u16::MAX - i + 1) % i;
267+
if lo <= zone {
268+
i -= 1;
269+
values.swap(i as usize, hi as usize);
270+
}
271+
}
272+
}
273+
while i > 1 {
274+
i -= 1;
275+
values.swap(i as usize, rng.gen_range(0, i + 1) as usize);
276+
}
277+
}
278+
227279
#[cfg(test)]
228280
mod test {
229281
use super::*;

0 commit comments

Comments
 (0)