From ce424372e6c0d7782bc60fcf19077542c357bcc4 Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Tue, 6 Dec 2022 00:05:54 +0000 Subject: [PATCH 01/11] Made shuffle and partial_shuffle faster --- benches/shuffle.rs | 77 ++++++++++++++++++++++++++++++++ src/seq/increasing_uniform.rs | 84 +++++++++++++++++++++++++++++++++++ src/seq/mod.rs | 62 +++++++++++++++++++------- 3 files changed, 206 insertions(+), 17 deletions(-) create mode 100644 benches/shuffle.rs create mode 100644 src/seq/increasing_uniform.rs diff --git a/benches/shuffle.rs b/benches/shuffle.rs new file mode 100644 index 0000000000..80011d5191 --- /dev/null +++ b/benches/shuffle.rs @@ -0,0 +1,77 @@ +// Copyright 2018-2022 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#![feature(test)] +#![allow(non_snake_case)] +#![feature(custom_inner_attributes)] +#![rustfmt::skip] +extern crate test; + +use test::Bencher; + +use core::mem::size_of; +use rand::prelude::*; +use rand::seq::*; + +// We force use of 32-bit RNG since seq code is optimised for use with 32-bit +// generators on all platforms. +use rand_chacha::ChaCha20Rng as CryptoRng; +use rand_pcg::Pcg32 as SmallRng; + +const RAND_BENCH_N: u64 = 1000; + +macro_rules! bench_seq_shuffle { + ($name:ident,$rng:ident, $fn:ident, $length:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + let mut rng = $rng::from_rng(thread_rng()).unwrap(); + let x: &mut [usize] = &mut [1; $length]; + b.iter(|| { + x.$fn(&mut rng); + x[0] + }) + } + }; +} + +macro_rules! bench_partial_seq_shuffle { + ($name:ident,$rng:ident, $fn:ident, $length:expr, $amount:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + let mut rng = $rng::from_rng(thread_rng()).unwrap(); + let x: &mut [usize] = &mut [1; $length]; + b.iter(|| { + let r = x.$fn(&mut rng, $amount); + r.0[0] + }) + } + }; +} + +bench_seq_shuffle!(bench_seq_shuffle_10000_crypto, CryptoRng, shuffle, 10000); +bench_seq_shuffle!(bench_seq_shuffle_10000_small_, SmallRng, shuffle, 10000); +bench_seq_shuffle!(bench_seq_shuffle_1000__crypto, CryptoRng, shuffle, 1000); +bench_seq_shuffle!(bench_seq_shuffle_1000__small_, SmallRng, shuffle, 1000); +bench_seq_shuffle!(bench_seq_shuffle_100__crypto, CryptoRng, shuffle, 100); +bench_seq_shuffle!(bench_seq_shuffle_100__small_, SmallRng, shuffle, 100); +bench_seq_shuffle!(bench_seq_shuffle_10____crypto, CryptoRng, shuffle, 10); +bench_seq_shuffle!(bench_seq_shuffle_10____small_, SmallRng, shuffle, 10); +bench_seq_shuffle!(bench_seq_shuffle__1____crypto, CryptoRng, shuffle, 1); +bench_seq_shuffle!(bench_seq_shuffle__1____small_, SmallRng, shuffle, 1); +bench_seq_shuffle!(bench_seq_shuffle__2____crypto, CryptoRng, shuffle, 2); +bench_seq_shuffle!(bench_seq_shuffle__2____small_, SmallRng, shuffle, 2); +bench_seq_shuffle!(bench_seq_shuffle__3____crypto, CryptoRng, shuffle, 3); +bench_seq_shuffle!(bench_seq_shuffle__3____small_, SmallRng, shuffle, 3); +bench_partial_seq_shuffle!(bench_partial_seq_shuffle_10000_crypto, CryptoRng, partial_shuffle, 10000, 5000); +bench_partial_seq_shuffle!(bench_partial_seq_shuffle_10000_small_, SmallRng, partial_shuffle, 10000, 5000); +bench_partial_seq_shuffle!(bench_partial_seq_shuffle_1000__crypto, CryptoRng, partial_shuffle, 1000, 500); +bench_partial_seq_shuffle!(bench_partial_seq_shuffle_1000__small_, SmallRng, partial_shuffle, 1000, 500); +bench_partial_seq_shuffle!(bench_partial_seq_shuffle_100__crypto, CryptoRng, partial_shuffle, 100, 50); +bench_partial_seq_shuffle!(bench_partial_seq_shuffle_100__small_, SmallRng, partial_shuffle, 100, 50); +bench_partial_seq_shuffle!(bench_partial_seq_shuffle_10____crypto, CryptoRng, partial_shuffle, 10, 5); +bench_partial_seq_shuffle!(bench_partial_seq_shuffle_10____small_, SmallRng, partial_shuffle, 10, 5); diff --git a/src/seq/increasing_uniform.rs b/src/seq/increasing_uniform.rs new file mode 100644 index 0000000000..08b941e77f --- /dev/null +++ b/src/seq/increasing_uniform.rs @@ -0,0 +1,84 @@ +use crate::{Rng, RngCore}; + +/// Similar to a Uniform distribution, but after returning a number in the range [0,n], n is increased by 1. +pub(crate) struct IncreasingUniform { + pub rng: R, + n: u32, + chunk: u32, //Chunk is a random number in [0, (n + 1) * (n + 2) *..* (n + chunk_remaining) ) + chunk_remaining: u8, +} + +impl IncreasingUniform { + /// Create a dice roller. + /// The next item returned will be a random number in the range [0,n] + pub fn new(rng: R, n: u32) -> Self { + let chunk_remaining = if n == 0 { 1 } else { 0 }; // If n = 0, the first number returned will always be 0, so we don't need to generate a random number + Self { + rng, + n, + chunk: 0, + chunk_remaining, + } + } + + /// Returns a number in [0,n] and increments n by 1. + /// Generates new random bits as needed + /// Panics if `n >= u32::MAX` + #[inline] + pub fn next_index(&mut self) -> usize { + let next_n = self.n + 1; + + let next_chunk_remaining = self.chunk_remaining.checked_sub(1).unwrap_or_else(|| { + //If the chunk is empty, generate a new chunk + let (bound, remaining) = calculate_bound_u32(next_n); + //bound = (n + 1) * (n + 2) *..* (n + remaining) + self.chunk = self.rng.gen_range(0..bound); + // Chunk is a random number in [0, (n + 1) * (n + 2) *..* (n + remaining) ) + + remaining - 1 + }); + + let result = if next_chunk_remaining == 0 { + //If the chunk is empty we asr + self.chunk as usize + // `chunk` is a random number in the range [0..n+1) + // Because `chunk_remaining` is about to be set to zero, we do not need to clear the chunk here + } else { + // `chunk` is a random number in a range that is a multiple of n+1 so r will be a random number in [0..n+1) + let r = self.chunk % next_n; + self.chunk /= next_n; + r as usize + }; + + self.chunk_remaining = next_chunk_remaining; + self.n = next_n; + result + } +} + +#[inline] +/// Calculates `bound`, `count` such that bound (m)*(m+1)*..*(m + remaining - 1) +fn calculate_bound_u32(m: u32) -> (u32, u8) { + #[inline] + const fn inner(m: u32) -> (u32, u8) { + let mut product = m; + let mut current = m + 1; + + loop { + if let Some(p) = u32::checked_mul(product, current) { + product = p; + current += 1; + } else { + let count = (current - m) as u8; //Maximum value of 13 for when min is 1 or 2 + return (product, count); + } + } + } + + const RESULT2: (u32, u8) = inner(2); + if m == 2 { + return RESULT2; //Making this value a constant instead of recalculating it gives a significant (~50%) performance boost for small shuffles + } + + inner(m) +} diff --git a/src/seq/mod.rs b/src/seq/mod.rs index e1286105c5..d7836d6138 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -29,6 +29,9 @@ mod coin_flipper; #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] pub mod index; +#[cfg(feature = "alloc")] +mod increasing_uniform; + #[cfg(feature = "alloc")] use core::ops::Index; @@ -42,6 +45,7 @@ use crate::distributions::WeightedError; use crate::Rng; use self::coin_flipper::CoinFlipper; +use self::increasing_uniform::IncreasingUniform; /// Extension trait on slices, providing random mutation and sampling methods. /// @@ -620,10 +624,11 @@ impl SliceRandom for [T] { where R: Rng + ?Sized, { - for i in (1..self.len()).rev() { - // invariant: elements with index > i have been locked in place. - self.swap(i, gen_index(rng, i + 1)); + if self.len() <= 1 { + // There is no need to shuffle an empty or single element slice + return; } + self.partial_shuffle(rng, self.len()); } fn partial_shuffle( @@ -632,19 +637,42 @@ impl SliceRandom for [T] { where R: Rng + ?Sized, { - // This applies Durstenfeld's algorithm for the - // [Fisher–Yates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm) - // for an unbiased permutation, but exits early after choosing `amount` - // elements. - - let len = self.len(); - let end = if amount >= len { 0 } else { len - amount }; + let m = self.len().saturating_sub(amount); - for i in (end..len).rev() { - // invariant: elements with index > i have been locked in place. - self.swap(i, gen_index(rng, i + 1)); + // The algorithm below is based on Durstenfeld's algorithm for the + // [Fisher–Yates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm) + // for an unbiased permutation. + // It ensures that the last `amount` elements of the slice are randomly selected from the whole slice. + + // The loop invariant is that elements in the range [m,i) are randomly selected from the first i elements. + // This is true initially because the range is empty as i = m. + // The loop body swaps the element at i with a random one of the first i + 1 elements, then increments i. + // After the swap, the element at i will be randomly selected from the first i + 1 elements. + // Because of the invariant, before the swap, every element in the range [m,i) was randomly selected from the first i elements. + // Each of those elements has a 1 in i chance of being swapped with the element at i. + // Therefore, after the swap, the elements in the range [m,i) will be randomly selected from the first i + 1 elements. + // But the element at i was also randomly selected from the first i + 1 elements. + // So the elements in the range [m,i+1) are all randomly selected from the first i + 1 elements. + // So before we increment i, the elements in the range [m,i + 1) are randomly selected from the first i + 1 elements. + // Therefore, the loop invariant holds. + // When the loop exits, elements in the range [m,length] will be randomly selected from the whole slice. + + + //`IncreasingUniform::next_index()` is about twice as fast as `gen_index` but only works for 32 bit integers + //Therefore we must use the slow method if the slice is longer than that. + if self.len() < (u32::MAX as usize) { + let mut chooser = IncreasingUniform::new(rng, m as u32); + for i in m..self.len() { + let index = chooser.next_index(); + self.swap(i, index); + } + } else { + for i in m..self.len() { + let index = gen_index(rng, i + 1); + self.swap(i, index); + } } - let r = self.split_at_mut(end); + let r = self.split_at_mut(m); (r.1, r.0) } } @@ -765,11 +793,11 @@ mod test { let mut r = crate::test::rng(414); nums.shuffle(&mut r); - assert_eq!(nums, [9, 5, 3, 10, 7, 12, 8, 11, 6, 4, 0, 2, 1]); + assert_eq!(nums, [5, 11, 0, 8, 7, 12, 6, 4, 9, 3, 1, 2, 10]); nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; let res = nums.partial_shuffle(&mut r, 6); - assert_eq!(res.0, &mut [7, 4, 8, 6, 9, 3]); - assert_eq!(res.1, &mut [0, 1, 2, 12, 11, 5, 10]); + assert_eq!(res.0, &mut [7, 12, 6, 8, 1, 9]); + assert_eq!(res.1, &mut [0, 11, 2, 3, 4, 5, 10]); } #[derive(Clone)] From e5f2c3bfd206722b6c3103b591a73d9523741023 Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Wed, 4 Jan 2023 17:49:08 +0000 Subject: [PATCH 02/11] Use criterion benchmarks for shuffle --- Cargo.toml | 5 +++ benches/shuffle.rs | 100 +++++++++++++++++---------------------------- 2 files changed, 42 insertions(+), 63 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 19f7573851..ec0e4d7767 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,4 +79,9 @@ criterion = { version = "0.4" } [[bench]] name = "seq_choose" path = "benches/seq_choose.rs" +harness = false + +[[bench]] +name = "shuffle" +path = "benches/shuffle.rs" harness = false \ No newline at end of file diff --git a/benches/shuffle.rs b/benches/shuffle.rs index 80011d5191..8ebd65a769 100644 --- a/benches/shuffle.rs +++ b/benches/shuffle.rs @@ -5,73 +5,47 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. - -#![feature(test)] -#![allow(non_snake_case)] -#![feature(custom_inner_attributes)] -#![rustfmt::skip] -extern crate test; - -use test::Bencher; - -use core::mem::size_of; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::prelude::*; -use rand::seq::*; - -// We force use of 32-bit RNG since seq code is optimised for use with 32-bit -// generators on all platforms. -use rand_chacha::ChaCha20Rng as CryptoRng; -use rand_pcg::Pcg32 as SmallRng; - -const RAND_BENCH_N: u64 = 1000; - -macro_rules! bench_seq_shuffle { - ($name:ident,$rng:ident, $fn:ident, $length:expr) => { - #[bench] - fn $name(b: &mut Bencher) { - let mut rng = $rng::from_rng(thread_rng()).unwrap(); - let x: &mut [usize] = &mut [1; $length]; - b.iter(|| { - x.$fn(&mut rng); - x[0] - }) - } - }; +use rand::SeedableRng; + +criterion_group!( +name = benches; +config = Criterion::default(); +targets = bench +); +criterion_main!(benches); + +pub fn bench(c: &mut Criterion) { + bench_rng::(c, "ChaCha12"); + bench_rng::(c, "ChaCha20"); + bench_rng::(c, "Pcg32"); + bench_rng::(c, "Pcg64"); } -macro_rules! bench_partial_seq_shuffle { - ($name:ident,$rng:ident, $fn:ident, $length:expr, $amount:expr) => { - #[bench] - fn $name(b: &mut Bencher) { - let mut rng = $rng::from_rng(thread_rng()).unwrap(); - let x: &mut [usize] = &mut [1; $length]; +fn bench_rng(c: &mut Criterion, rng_name: &'static str) { + for length in [1, 2, 3, 10, 100, 1000, 10000].map(|x| black_box(x)) { + c.bench_function(format!("shuffle_{length}_{rng_name}").as_str(), |b| { + let mut rng = Rng::seed_from_u64(123); + let mut vec: Vec = (0..length).collect(); b.iter(|| { - let r = x.$fn(&mut rng, $amount); - r.0[0] + vec.shuffle(&mut rng); + vec[0] }) + }); + + if length >= 10 { + c.bench_function( + format!("partial_shuffle_{length}_{rng_name}").as_str(), + |b| { + let mut rng = Rng::seed_from_u64(123); + let mut vec: Vec = (0..length).collect(); + b.iter(|| { + vec.partial_shuffle(&mut rng, length / 2); + vec[0] + }) + }, + ); } - }; + } } - -bench_seq_shuffle!(bench_seq_shuffle_10000_crypto, CryptoRng, shuffle, 10000); -bench_seq_shuffle!(bench_seq_shuffle_10000_small_, SmallRng, shuffle, 10000); -bench_seq_shuffle!(bench_seq_shuffle_1000__crypto, CryptoRng, shuffle, 1000); -bench_seq_shuffle!(bench_seq_shuffle_1000__small_, SmallRng, shuffle, 1000); -bench_seq_shuffle!(bench_seq_shuffle_100__crypto, CryptoRng, shuffle, 100); -bench_seq_shuffle!(bench_seq_shuffle_100__small_, SmallRng, shuffle, 100); -bench_seq_shuffle!(bench_seq_shuffle_10____crypto, CryptoRng, shuffle, 10); -bench_seq_shuffle!(bench_seq_shuffle_10____small_, SmallRng, shuffle, 10); -bench_seq_shuffle!(bench_seq_shuffle__1____crypto, CryptoRng, shuffle, 1); -bench_seq_shuffle!(bench_seq_shuffle__1____small_, SmallRng, shuffle, 1); -bench_seq_shuffle!(bench_seq_shuffle__2____crypto, CryptoRng, shuffle, 2); -bench_seq_shuffle!(bench_seq_shuffle__2____small_, SmallRng, shuffle, 2); -bench_seq_shuffle!(bench_seq_shuffle__3____crypto, CryptoRng, shuffle, 3); -bench_seq_shuffle!(bench_seq_shuffle__3____small_, SmallRng, shuffle, 3); -bench_partial_seq_shuffle!(bench_partial_seq_shuffle_10000_crypto, CryptoRng, partial_shuffle, 10000, 5000); -bench_partial_seq_shuffle!(bench_partial_seq_shuffle_10000_small_, SmallRng, partial_shuffle, 10000, 5000); -bench_partial_seq_shuffle!(bench_partial_seq_shuffle_1000__crypto, CryptoRng, partial_shuffle, 1000, 500); -bench_partial_seq_shuffle!(bench_partial_seq_shuffle_1000__small_, SmallRng, partial_shuffle, 1000, 500); -bench_partial_seq_shuffle!(bench_partial_seq_shuffle_100__crypto, CryptoRng, partial_shuffle, 100, 50); -bench_partial_seq_shuffle!(bench_partial_seq_shuffle_100__small_, SmallRng, partial_shuffle, 100, 50); -bench_partial_seq_shuffle!(bench_partial_seq_shuffle_10____crypto, CryptoRng, partial_shuffle, 10, 5); -bench_partial_seq_shuffle!(bench_partial_seq_shuffle_10____small_, SmallRng, partial_shuffle, 10, 5); From ae53b3cd74f250bebc772aba45a8b601773cdd1b Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Wed, 4 Jan 2023 17:52:43 +0000 Subject: [PATCH 03/11] Added a note about RNG word size --- src/seq/increasing_uniform.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/seq/increasing_uniform.rs b/src/seq/increasing_uniform.rs index 08b941e77f..880d90cd87 100644 --- a/src/seq/increasing_uniform.rs +++ b/src/seq/increasing_uniform.rs @@ -4,6 +4,7 @@ use crate::{Rng, RngCore}; pub(crate) struct IncreasingUniform { pub rng: R, n: u32, + //TODO(opt): this should depend on RNG word size chunk: u32, //Chunk is a random number in [0, (n + 1) * (n + 2) *..* (n + chunk_remaining) ) chunk_remaining: u8, } From 485d015f71a4758c6727d3c9b3d54e1baf1326ec Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Fri, 6 Jan 2023 14:50:21 +0000 Subject: [PATCH 04/11] Tidied comments --- src/seq/increasing_uniform.rs | 34 +++++++++++++++++++++------------- src/seq/mod.rs | 22 +++++----------------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/src/seq/increasing_uniform.rs b/src/seq/increasing_uniform.rs index 880d90cd87..c26385b411 100644 --- a/src/seq/increasing_uniform.rs +++ b/src/seq/increasing_uniform.rs @@ -1,11 +1,12 @@ use crate::{Rng, RngCore}; -/// Similar to a Uniform distribution, but after returning a number in the range [0,n], n is increased by 1. +/// Similar to a Uniform distribution, +/// but after returning a number in the range [0,n], n is increased by 1. pub(crate) struct IncreasingUniform { pub rng: R, n: u32, - //TODO(opt): this should depend on RNG word size - chunk: u32, //Chunk is a random number in [0, (n + 1) * (n + 2) *..* (n + chunk_remaining) ) + // Chunk is a random number in [0, (n + 1) * (n + 2) *..* (n + chunk_remaining) ) + chunk: u32, chunk_remaining: u8, } @@ -13,7 +14,9 @@ impl IncreasingUniform { /// Create a dice roller. /// The next item returned will be a random number in the range [0,n] pub fn new(rng: R, n: u32) -> Self { - let chunk_remaining = if n == 0 { 1 } else { 0 }; // If n = 0, the first number returned will always be 0, so we don't need to generate a random number + // If n = 0, the first number returned will always be 0 + // so we don't need to generate a random number + let chunk_remaining = if n == 0 { 1 } else { 0 }; Self { rng, n, @@ -30,22 +33,24 @@ impl IncreasingUniform { let next_n = self.n + 1; let next_chunk_remaining = self.chunk_remaining.checked_sub(1).unwrap_or_else(|| { - //If the chunk is empty, generate a new chunk + // If the chunk is empty, generate a new chunk let (bound, remaining) = calculate_bound_u32(next_n); - //bound = (n + 1) * (n + 2) *..* (n + remaining) + // bound = (n + 1) * (n + 2) *..* (n + remaining) self.chunk = self.rng.gen_range(0..bound); - // Chunk is a random number in [0, (n + 1) * (n + 2) *..* (n + remaining) ) + // Chunk is a random number in + // [0, (n + 1) * (n + 2) *..* (n + remaining) ) remaining - 1 }); let result = if next_chunk_remaining == 0 { - //If the chunk is empty we asr - self.chunk as usize // `chunk` is a random number in the range [0..n+1) - // Because `chunk_remaining` is about to be set to zero, we do not need to clear the chunk here + // Because `chunk_remaining` is about to be set to zero + // we do not need to clear the chunk here + self.chunk as usize } else { - // `chunk` is a random number in a range that is a multiple of n+1 so r will be a random number in [0..n+1) + // `chunk` is a random number in a range that is a multiple of n+1 + // so r will be a random number in [0..n+1) let r = self.chunk % next_n; self.chunk /= next_n; r as usize @@ -70,7 +75,8 @@ fn calculate_bound_u32(m: u32) -> (u32, u8) { product = p; current += 1; } else { - let count = (current - m) as u8; //Maximum value of 13 for when min is 1 or 2 + // Count has a maximum value of 13 for when min is 1 or 2 + let count = (current - m) as u8; return (product, count); } } @@ -78,7 +84,9 @@ fn calculate_bound_u32(m: u32) -> (u32, u8) { const RESULT2: (u32, u8) = inner(2); if m == 2 { - return RESULT2; //Making this value a constant instead of recalculating it gives a significant (~50%) performance boost for small shuffles + // Making this value a constant instead of recalculating it + // gives a significant (~50%) performance boost for small shuffles + return RESULT2; } inner(m) diff --git a/src/seq/mod.rs b/src/seq/mod.rs index d7836d6138..cf71f35569 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -642,24 +642,12 @@ impl SliceRandom for [T] { // The algorithm below is based on Durstenfeld's algorithm for the // [Fisher–Yates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm) // for an unbiased permutation. - // It ensures that the last `amount` elements of the slice are randomly selected from the whole slice. - - // The loop invariant is that elements in the range [m,i) are randomly selected from the first i elements. - // This is true initially because the range is empty as i = m. - // The loop body swaps the element at i with a random one of the first i + 1 elements, then increments i. - // After the swap, the element at i will be randomly selected from the first i + 1 elements. - // Because of the invariant, before the swap, every element in the range [m,i) was randomly selected from the first i elements. - // Each of those elements has a 1 in i chance of being swapped with the element at i. - // Therefore, after the swap, the elements in the range [m,i) will be randomly selected from the first i + 1 elements. - // But the element at i was also randomly selected from the first i + 1 elements. - // So the elements in the range [m,i+1) are all randomly selected from the first i + 1 elements. - // So before we increment i, the elements in the range [m,i + 1) are randomly selected from the first i + 1 elements. - // Therefore, the loop invariant holds. - // When the loop exits, elements in the range [m,length] will be randomly selected from the whole slice. + // It ensures that the last `amount` elements of the slice + // are randomly selected from the whole slice. - - //`IncreasingUniform::next_index()` is about twice as fast as `gen_index` but only works for 32 bit integers - //Therefore we must use the slow method if the slice is longer than that. + //`IncreasingUniform::next_index()` is faster than `gen_index` + //but only works for 32 bit integers + //So we must use the slow method if the slice is longer than that. if self.len() < (u32::MAX as usize) { let mut chooser = IncreasingUniform::new(rng, m as u32); for i in m..self.len() { From c7c52a56daee1c8df22058ae81c09e65a669f02d Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Fri, 6 Jan 2023 14:51:37 +0000 Subject: [PATCH 05/11] Added a debug_assert --- src/seq/increasing_uniform.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/seq/increasing_uniform.rs b/src/seq/increasing_uniform.rs index c26385b411..160a181f6c 100644 --- a/src/seq/increasing_uniform.rs +++ b/src/seq/increasing_uniform.rs @@ -67,6 +67,7 @@ impl IncreasingUniform { fn calculate_bound_u32(m: u32) -> (u32, u8) { #[inline] const fn inner(m: u32) -> (u32, u8) { + debug_assert!(m > 0); let mut product = m; let mut current = m + 1; From d2e939f7ccdfac426010f6469a9f7d271cd28ca5 Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Fri, 6 Jan 2023 14:57:06 +0000 Subject: [PATCH 06/11] Added a comment re possible further optimization --- src/seq/increasing_uniform.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/seq/increasing_uniform.rs b/src/seq/increasing_uniform.rs index 160a181f6c..d4b6f1e0d1 100644 --- a/src/seq/increasing_uniform.rs +++ b/src/seq/increasing_uniform.rs @@ -32,6 +32,12 @@ impl IncreasingUniform { pub fn next_index(&mut self) -> usize { let next_n = self.n + 1; + // There's room for further optimisation here: + // gen_range uses rejection sampling (or other method; see #1196) to avoid bias. + // When the initial sample is biased for range 0..bound + // it may still be viable to use for a smaller bound + // (especially if small biases are considered acceptable). + let next_chunk_remaining = self.chunk_remaining.checked_sub(1).unwrap_or_else(|| { // If the chunk is empty, generate a new chunk let (bound, remaining) = calculate_bound_u32(next_n); From 06820c2396696d610b7274c525246cc8ce53d14e Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Fri, 6 Jan 2023 15:58:59 +0000 Subject: [PATCH 07/11] Added and updated copyright notices --- benches/seq_choose.rs | 2 +- benches/shuffle.rs | 2 +- src/seq/coin_flipper.rs | 8 ++++++++ src/seq/increasing_uniform.rs | 8 ++++++++ src/seq/mod.rs | 2 +- 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/benches/seq_choose.rs b/benches/seq_choose.rs index 44b4bdf972..2c34d77ced 100644 --- a/benches/seq_choose.rs +++ b/benches/seq_choose.rs @@ -1,4 +1,4 @@ -// Copyright 2018-2022 Developers of the Rand project. +// Copyright 2018-2023 Developers of the Rand project. // // Licensed under the Apache License, Version 2.0 or the MIT license diff --git a/benches/shuffle.rs b/benches/shuffle.rs index 8ebd65a769..47efcbd516 100644 --- a/benches/shuffle.rs +++ b/benches/shuffle.rs @@ -1,4 +1,4 @@ -// Copyright 2018-2022 Developers of the Rand project. +// Copyright 2018-2023 Developers of the Rand project. // // Licensed under the Apache License, Version 2.0 or the MIT license diff --git a/src/seq/coin_flipper.rs b/src/seq/coin_flipper.rs index 77c18ded43..05f18d71b2 100644 --- a/src/seq/coin_flipper.rs +++ b/src/seq/coin_flipper.rs @@ -1,3 +1,11 @@ +// Copyright 2018-2023 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + use crate::RngCore; pub(crate) struct CoinFlipper { diff --git a/src/seq/increasing_uniform.rs b/src/seq/increasing_uniform.rs index d4b6f1e0d1..0abfce15b4 100644 --- a/src/seq/increasing_uniform.rs +++ b/src/seq/increasing_uniform.rs @@ -1,3 +1,11 @@ +// Copyright 2018-2023 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + use crate::{Rng, RngCore}; /// Similar to a Uniform distribution, diff --git a/src/seq/mod.rs b/src/seq/mod.rs index cf71f35569..42f75b9f15 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -1,4 +1,4 @@ -// Copyright 2018 Developers of the Rand project. +// Copyright 2018-2023 Developers of the Rand project. // // Licensed under the Apache License, Version 2.0 or the MIT license From f7b4a99c457ecfa1f696bca8f01e4b42260ff985 Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Fri, 6 Jan 2023 16:46:37 +0000 Subject: [PATCH 08/11] Revert cfg mistake --- src/seq/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 42f75b9f15..dddec36f83 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -29,7 +29,6 @@ mod coin_flipper; #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] pub mod index; -#[cfg(feature = "alloc")] mod increasing_uniform; #[cfg(feature = "alloc")] @@ -1070,7 +1069,7 @@ mod test { *slice.last_mut().unwrap() = last_val; } let mut counts = [0i32; 24]; - for _ in 0..10000 { + for _ in 0..100000 { let mut arr: [usize; 4] = [0, 1, 2, 3]; arr.shuffle(&mut r); let mut permutation = 0usize; From fbd71146f9190f9ad97955b76862a24e0ecc72a2 Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Fri, 6 Jan 2023 17:05:40 +0000 Subject: [PATCH 09/11] Reverted change to mod.rs --- src/seq/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/seq/mod.rs b/src/seq/mod.rs index dddec36f83..d9b38e920d 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -1069,7 +1069,7 @@ mod test { *slice.last_mut().unwrap() = last_val; } let mut counts = [0i32; 24]; - for _ in 0..100000 { + for _ in 0..10000 { let mut arr: [usize; 4] = [0, 1, 2, 3]; arr.shuffle(&mut r); let mut permutation = 0usize; From 7bff8288e1758d668eaf5d3bbd0d2613afb66001 Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Fri, 6 Jan 2023 17:11:31 +0000 Subject: [PATCH 10/11] Removed ChaCha20 benches from shuffle --- benches/shuffle.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/benches/shuffle.rs b/benches/shuffle.rs index 47efcbd516..3d6878219f 100644 --- a/benches/shuffle.rs +++ b/benches/shuffle.rs @@ -18,7 +18,6 @@ criterion_main!(benches); pub fn bench(c: &mut Criterion) { bench_rng::(c, "ChaCha12"); - bench_rng::(c, "ChaCha20"); bench_rng::(c, "Pcg32"); bench_rng::(c, "Pcg64"); } From bf380972f18d751acfb0e995cb1ce3717dd9430d Mon Sep 17 00:00:00 2001 From: Mark Wainwright Date: Fri, 6 Jan 2023 17:35:43 +0000 Subject: [PATCH 11/11] moved debug_assert out of a const fn --- src/seq/increasing_uniform.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/seq/increasing_uniform.rs b/src/seq/increasing_uniform.rs index 0abfce15b4..3208c656fb 100644 --- a/src/seq/increasing_uniform.rs +++ b/src/seq/increasing_uniform.rs @@ -79,9 +79,9 @@ impl IncreasingUniform { #[inline] /// Calculates `bound`, `count` such that bound (m)*(m+1)*..*(m + remaining - 1) fn calculate_bound_u32(m: u32) -> (u32, u8) { + debug_assert!(m > 0); #[inline] const fn inner(m: u32) -> (u32, u8) { - debug_assert!(m > 0); let mut product = m; let mut current = m + 1;