From c8fe4db671c68528b3e935cbf0a4021cc74a1cd5 Mon Sep 17 00:00:00 2001 From: Narthana Epa Date: Tue, 9 Jan 2024 14:23:31 +0530 Subject: [PATCH 1/2] Add explanatory comments to test --- src/passphrase.rs | 61 ++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 22 deletions(-) diff --git a/src/passphrase.rs b/src/passphrase.rs index 8058a95..e44cf9f 100644 --- a/src/passphrase.rs +++ b/src/passphrase.rs @@ -28,44 +28,61 @@ pub fn new( mod test { #[test] + // Uses [Pearson's chi-squared test](https://en.wikipedia.org/wiki/Chi-squared_test#Pearson's_chi-squared_test) + // to test that the passphrases are uniformly distributed. fn chi_squared() { use crate::{passphrase, words}; use statrs::distribution::{ChiSquared, ContinuousCDF}; use std::collections::HashMap; - let n = 4; - let n_fact = 24; - // this test file has n = 4 words, which can have 24 permutations + // This test file has W = 4 words, which can have 24 permutations + const W: usize = 4; + const W_FACTORIAL: usize = 24; + const N: usize = 1_200_000; // number of samples + let words = words::list(Some("src/fixtures/test")).unwrap(); - let trials = 1_200_000; let mut rng = rand::thread_rng(); - - let mut histogram: HashMap = HashMap::new(); - (1..trials).for_each(|_| { + let histogram = (1..N).fold(HashMap::new(), |mut acc, _| { let mut words = words.clone(); - let s = passphrase::new(&mut rng, &mut words, n, " "); - *histogram.entry(s).or_insert(0) += 1; + let s = passphrase::new(&mut rng, &mut words, W, " "); + *acc.entry(s).or_insert(0) += 1 as usize; + acc }); - assert_eq!(histogram.len(), n_fact); + // There should be at most W! different passphrases. If, by chance, some of them are not + // generated, then the chi-squared test is highly unlikely to conclude that they are + // uniformly distributed. + assert_eq!( + histogram.len(), + W_FACTORIAL, + "expected there to be {} different passphrases, but there were {}", + W_FACTORIAL, + histogram.len(), + ); - let expected_frequency = trials as f64 / n_fact as f64; + let expected_frequency = N as f64 / W_FACTORIAL as f64; let chi_squared_stat: f64 = histogram - .iter() - .map(|(_, v)| (*v as f64 - expected_frequency).powi(2) / expected_frequency) + .values() + .map(|v| (*v as f64 - expected_frequency).powi(2) / expected_frequency) .sum(); - // degrees of freedom = (number of rows - 1) * (number of columns - 1) - let df = ((2 - 1) * (24 - 1)) as f64; - let dist = ChiSquared::new(df).unwrap(); - let p = 1.0 - dist.cdf(chi_squared_stat); + // Since the number in any permutation is determined by the number in all the others, + // degrees of freedom = number of permutations - 1 + const DF: f64 = (W_FACTORIAL - 1) as f64; + let dist = ChiSquared::new(DF).unwrap(); - eprintln!("χ^2: {}", chi_squared_stat); - eprintln!("p: {}", p); + // The p-value is the area under the chi-squared pdf to the right of the chi_squared_stat + let p = 1.0 - dist.cdf(chi_squared_stat); - // the p-value should be greater than 0.05 so that we can't reject the null hypothesis - // if we can reject the null hypothesis, then the passphrase generator is not uniform - assert_eq!(p > 0.05, true); + // The p-value should be greater than 0.05 so that we can't reject the null hypothesis that + // the values are from a uniform distribution. + // If we can reject the null hypothesis, then the passphrase generator may not be uniform. + assert!( + p > 0.05, + "passphrase may not be uniformly random. (p = {} <= 0.05, χ^2 = {}).", + p, + chi_squared_stat, + ); } } From 16a1b3eb78b9177cd756889b006bdb05e1a00580 Mon Sep 17 00:00:00 2001 From: Narthana Epa Date: Tue, 9 Jan 2024 17:07:48 +0530 Subject: [PATCH 2/2] Modify test to be parallel with rayon and increase samples by factor of 10 --- Cargo.lock | 52 +++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + src/passphrase.rs | 41 +++++++++++++++++++++++-------------- 3 files changed, 79 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7352184..b1498db 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -132,6 +132,37 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + [[package]] name = "eyre" version = "0.6.11" @@ -350,6 +381,26 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" +[[package]] +name = "rayon" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "regex" version = "1.10.2" @@ -387,6 +438,7 @@ dependencies = [ "eyre", "lazy_static", "rand", + "rayon", "regex", "statrs", ] diff --git a/Cargo.toml b/Cargo.toml index 407968e..61709e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,4 +13,5 @@ rand = "0.8.5" regex = "1.10.2" [dev-dependencies] +rayon = "1.8.0" statrs = "0.16.0" diff --git a/src/passphrase.rs b/src/passphrase.rs index e44cf9f..26b8344 100644 --- a/src/passphrase.rs +++ b/src/passphrase.rs @@ -32,34 +32,45 @@ mod test { // to test that the passphrases are uniformly distributed. fn chi_squared() { use crate::{passphrase, words}; + use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; use statrs::distribution::{ChiSquared, ContinuousCDF}; use std::collections::HashMap; // This test file has W = 4 words, which can have 24 permutations const W: usize = 4; const W_FACTORIAL: usize = 24; - const N: usize = 1_200_000; // number of samples + const N: usize = 12_000_000; // number of samples let words = words::list(Some("src/fixtures/test")).unwrap(); - let mut rng = rand::thread_rng(); - let histogram = (1..N).fold(HashMap::new(), |mut acc, _| { - let mut words = words.clone(); - let s = passphrase::new(&mut rng, &mut words, W, " "); - *acc.entry(s).or_insert(0) += 1 as usize; - acc - }); + let histogram = Vec::from_iter(0..N) + .par_iter() + .fold_chunks( + N / std::thread::available_parallelism().unwrap(), + || HashMap::new(), + |mut acc, _| { + let mut rng = rand::thread_rng(); + let mut words = words.clone(); + let s = passphrase::new(&mut rng, &mut words, W, " "); + *acc.entry(s).or_insert(0) += 1 as usize; + acc + }, + ) + .collect::>>() + .iter() + .fold(HashMap::new(), |mut acc, h| { + h.iter().for_each(|(k, v)| { + *acc.entry(k.to_owned()).or_insert(0) += v; + }); + acc + }); + + assert_eq!(histogram.values().sum::(), N, "missing samples"); // There should be at most W! different passphrases. If, by chance, some of them are not // generated, then the chi-squared test is highly unlikely to conclude that they are // uniformly distributed. - assert_eq!( - histogram.len(), - W_FACTORIAL, - "expected there to be {} different passphrases, but there were {}", - W_FACTORIAL, - histogram.len(), - ); + assert_eq!(W_FACTORIAL, histogram.len(), "missing a permutation"); let expected_frequency = N as f64 / W_FACTORIAL as f64; let chi_squared_stat: f64 = histogram