Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compare sampled normal distribution to PDF #1121

Merged
merged 14 commits into from
May 12, 2021
28 changes: 14 additions & 14 deletions benches/seq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ fn seq_shuffle_100(b: &mut Bencher) {
fn seq_slice_choose_1_of_1000(b: &mut Bencher) {
let mut rng = SmallRng::from_rng(thread_rng()).unwrap();
let x: &mut [usize] = &mut [1; 1000];
for i in 0..1000 {
x[i] = i;
for (i, r) in x.iter_mut().enumerate() {
*r = i;
}
b.iter(|| {
let mut s = 0;
Expand Down Expand Up @@ -78,8 +78,8 @@ seq_slice_choose_multiple!(seq_slice_choose_multiple_90_of_100, 90, 100);
fn seq_iter_choose_from_1000(b: &mut Bencher) {
let mut rng = SmallRng::from_rng(thread_rng()).unwrap();
let x: &mut [usize] = &mut [1; 1000];
for i in 0..1000 {
x[i] = i;
for (i, r) in x.iter_mut().enumerate() {
*r = i;
}
b.iter(|| {
let mut s = 0;
Expand Down Expand Up @@ -172,11 +172,11 @@ macro_rules! sample_indices {
sample_indices!(misc_sample_indices_1_of_1k, sample, 1, 1000);
sample_indices!(misc_sample_indices_10_of_1k, sample, 10, 1000);
sample_indices!(misc_sample_indices_100_of_1k, sample, 100, 1000);
sample_indices!(misc_sample_indices_100_of_1M, sample, 100, 1000_000);
sample_indices!(misc_sample_indices_100_of_1G, sample, 100, 1000_000_000);
sample_indices!(misc_sample_indices_200_of_1G, sample, 200, 1000_000_000);
sample_indices!(misc_sample_indices_400_of_1G, sample, 400, 1000_000_000);
sample_indices!(misc_sample_indices_600_of_1G, sample, 600, 1000_000_000);
sample_indices!(misc_sample_indices_100_of_1M, sample, 100, 1_000_000);
sample_indices!(misc_sample_indices_100_of_1G, sample, 100, 1_000_000_000);
sample_indices!(misc_sample_indices_200_of_1G, sample, 200, 1_000_000_000);
sample_indices!(misc_sample_indices_400_of_1G, sample, 400, 1_000_000_000);
sample_indices!(misc_sample_indices_600_of_1G, sample, 600, 1_000_000_000);

macro_rules! sample_indices_rand_weights {
($name:ident, $amount:expr, $length:expr) => {
Expand All @@ -193,8 +193,8 @@ macro_rules! sample_indices_rand_weights {
sample_indices_rand_weights!(misc_sample_weighted_indices_1_of_1k, 1, 1000);
sample_indices_rand_weights!(misc_sample_weighted_indices_10_of_1k, 10, 1000);
sample_indices_rand_weights!(misc_sample_weighted_indices_100_of_1k, 100, 1000);
sample_indices_rand_weights!(misc_sample_weighted_indices_100_of_1M, 100, 1000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_200_of_1M, 200, 1000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_400_of_1M, 400, 1000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_600_of_1M, 600, 1000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_1k_of_1M, 1000, 1000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_100_of_1M, 100, 1_000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_200_of_1M, 200, 1_000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_400_of_1M, 400, 1_000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_600_of_1M, 600, 1_000_000);
sample_indices_rand_weights!(misc_sample_weighted_indices_1k_of_1M, 1000, 1_000_000);
2 changes: 1 addition & 1 deletion rand_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ mod test {
// This is the binomial distribution B(64, 0.5), so chance of
// weight < 20 is binocdf(19, 64, 0.5) = 7.8e-4, and same for
// weight > 44.
assert!(weight >= 20 && weight <= 44);
assert!((20..=44).contains(&weight));

for (i2, r2) in results.iter().enumerate() {
if i1 == i2 {
Expand Down
4 changes: 2 additions & 2 deletions rand_distr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ std_math = ["num-traits/std"]
[dev-dependencies]
rand_pcg = { version = "0.3.0", path = "../rand_pcg" }
# For inline examples
rand = { path = "..", version = "0.8.0", default-features = false, features = ["std_rng", "std"] }
rand = { path = "..", version = "0.8.0", default-features = false, features = ["std_rng", "std", "small_rng"] }
# Histogram implementation for testing uniformity
average = { version = "0.12", features = [ "std" ] }
average = { version = "0.13", features = [ "std" ] }
3 changes: 2 additions & 1 deletion rand_distr/benches/distributions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ distr_int!(distr_geometric, u64, Geometric::new(0.5).unwrap());
distr_int!(distr_standard_geometric, u64, StandardGeometric);

#[bench]
#[allow(clippy::approx_constant)]
fn dist_iter(b: &mut Bencher) {
let mut rng = Pcg64Mcg::from_entropy();
let distr = Normal::new(-2.71828, 3.14159).unwrap();
Expand Down Expand Up @@ -177,4 +178,4 @@ sample_binomial!(misc_binomial_1, 1, 0.9);
sample_binomial!(misc_binomial_10, 10, 0.9);
sample_binomial!(misc_binomial_100, 100, 0.99);
sample_binomial!(misc_binomial_1000, 1000, 0.01);
sample_binomial!(misc_binomial_1e12, 1000_000_000_000, 0.2);
sample_binomial!(misc_binomial_1e12, 1_000_000_000_000, 0.2);
6 changes: 3 additions & 3 deletions rand_distr/src/cauchy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ mod test {
let mut rng = crate::test::rng(123);
let mut numbers: [f64; 1000] = [0.0; 1000];
let mut sum = 0.0;
for i in 0..1000 {
numbers[i] = cauchy.sample(&mut rng);
sum += numbers[i];
for number in &mut numbers[..] {
*number = cauchy.sample(&mut rng);
sum += *number;
}
let median = median(&mut numbers);
#[cfg(feature = "std")]
Expand Down
4 changes: 4 additions & 0 deletions rand_distr/src/gamma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@

//! The Gamma and derived distributions.

// We use the variable names from the published reference, therefore this
// warning is not helpful.
#![allow(clippy::many_single_char_names)]

use self::ChiSquaredRepr::*;
use self::GammaRepr::*;

Expand Down
2 changes: 1 addition & 1 deletion rand_distr/src/hypergeometric.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ impl Hypergeometric {
}
};

Ok(Hypergeometric { n1, n2, k, sign_x, offset_x, sampling_method })
Ok(Hypergeometric { n1, n2, k, offset_x, sign_x, sampling_method })
}
}

Expand Down
2 changes: 1 addition & 1 deletion rand_distr/src/weighted_alias.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ mod test {
weights[ZERO_WEIGHT_INDEX as usize] = W::ZERO;
weights
};
let weight_sum = weights.iter().map(|w| *w).sum::<W>();
let weight_sum = weights.iter().copied().sum::<W>();
let expected_counts = weights
.iter()
.map(|&w| w_to_f64(w) / w_to_f64(weight_sum) * NUM_SAMPLES as f64)
Expand Down
86 changes: 86 additions & 0 deletions rand_distr/tests/pdf.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright 2021 Developers of the Rand project.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#![allow(clippy::float_cmp)]

use average::Histogram;
use rand::{Rng, SeedableRng};
use rand_distr::Normal;

const HIST_LEN: usize = 100;
average::define_histogram!(hist, crate::HIST_LEN);
use hist::Histogram as Histogram100;

mod sparkline;

#[test]
fn normal() {
const N_SAMPLES: u64 = 1_000_000;
const MEAN: f64 = 2.;
const STD_DEV: f64 = 0.5;
const MIN_X: f64 = -1.;
const MAX_X: f64 = 5.;

let dist = Normal::new(MEAN, STD_DEV).unwrap();
let mut hist = Histogram100::with_const_width(MIN_X,MAX_X);
let mut rng = rand::rngs::SmallRng::seed_from_u64(1);

for _ in 0..N_SAMPLES {
let _ = hist.add(rng.sample(dist)); // Ignore out-of-range values
}

println!("Sampled normal distribution:\n{}",
sparkline::render_u64_as_string(hist.bins()));

fn pdf(x: f64) -> f64 {
(-0.5 * ((x - MEAN) / STD_DEV).powi(2)).exp() /
(STD_DEV * (2. * core::f64::consts::PI).sqrt())
}

let mut bin_centers = hist.centers();
let mut expected = [0.; HIST_LEN];
for e in &mut expected[..] {
*e = pdf(bin_centers.next().unwrap());
}

println!("Expected normal distribution:\n{}",
sparkline::render_u64_as_string(hist.bins()));

let mut diff = [0.; HIST_LEN];
for (i, n) in hist.normalized_bins().enumerate() {
let bin = (n as f64) / (N_SAMPLES as f64) ;
diff[i] = (bin - expected[i]).abs();
}

println!("Difference:\n{}",
sparkline::render_f64_as_string(&diff[..]));
println!("max diff: {:?}", diff.iter().fold(
core::f64::NEG_INFINITY, |a, &b| a.max(b)));

// Check that the differences are significantly smaller than the expected error.
let mut expected_error = [0.; HIST_LEN];
// Calculate error from histogram
for (err, var) in expected_error.iter_mut().zip(hist.variances()) {
*err = var.sqrt() / (N_SAMPLES as f64);
}
// Normalize error by bin width
for (err, width) in expected_error.iter_mut().zip(hist.widths()) {
*err /= width;
}
// TODO: Calculate error from distribution cutoff / normalization

println!("max expected_error: {:?}", expected_error.iter().fold(
core::f64::NEG_INFINITY, |a, &b| a.max(b)));
for (&d, &e) in diff.iter().zip(expected_error.iter()) {
// Difference larger than 3 standard deviations or cutoff
let tol = (3. * e).max(1e-4);
if d > tol {
panic!("Difference = {} * tol", d / tol);
}
}
}
128 changes: 128 additions & 0 deletions rand_distr/tests/sparkline.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
// Copyright 2021 Developers of the Rand project.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

/// Number of ticks.
const N: usize = 8;
/// Ticks used for the sparkline.
static TICKS: [char; N] = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'];

/// Render a sparkline of `data` into `buffer`.
pub fn render_u64(data: &[u64], buffer: &mut String) {
match data.len() {
0 => {
return;
},
1 => {
if data[0] == 0 {
buffer.push(TICKS[0]);
} else {
buffer.push(TICKS[N - 1]);
}
return;
},
_ => {},
}
let max = data.iter().max().unwrap();
let min = data.iter().min().unwrap();
let scale = ((N - 1) as f64) / ((max - min) as f64);
for i in data {
let tick = (((i - min) as f64) * scale) as usize;
buffer.push(TICKS[tick]);
}
}

/// Calculate the required capacity for the sparkline, given the length of the
/// input data.
pub fn required_capacity(len: usize) -> usize {
len * TICKS[0].len_utf8()
}

/// Render a sparkline of `data` into a newly allocated string.
pub fn render_u64_as_string(data: &[u64]) -> String {
let cap = required_capacity(data.len());
let mut s = String::with_capacity(cap);
render_u64(data, &mut s);
debug_assert_eq!(s.capacity(), cap);
s
}

/// Render a sparkline of `data` into `buffer`.
pub fn render_f64(data: &[f64], buffer: &mut String) {
match data.len() {
0 => {
return;
},
1 => {
if data[0] == 0. {
buffer.push(TICKS[0]);
} else {
buffer.push(TICKS[N - 1]);
}
return;
},
_ => {},
}
for x in data {
assert!(x.is_finite(), "can only render finite values");
}
let max = data.iter().fold(
core::f64::NEG_INFINITY, |a, &b| a.max(b));
let min = data.iter().fold(
core::f64::INFINITY, |a, &b| a.min(b));
let scale = ((N - 1) as f64) / (max - min);
for x in data {
let tick = ((x - min) * scale) as usize;
buffer.push(TICKS[tick]);
}
}

/// Render a sparkline of `data` into a newly allocated string.
pub fn render_f64_as_string(data: &[f64]) -> String {
let cap = required_capacity(data.len());
let mut s = String::with_capacity(cap);
render_f64(data, &mut s);
debug_assert_eq!(s.capacity(), cap);
s
}

#[cfg(test)]
mod tests {
#[test]
fn render_u64() {
let data = [2, 250, 670, 890, 2, 430, 11, 908, 123, 57];
let mut s = String::with_capacity(super::required_capacity(data.len()));
super::render_u64(&data, &mut s);
println!("{}", s);
assert_eq!("▁▂▆▇▁▄▁█▁▁", &s);
}

#[test]
fn render_u64_as_string() {
let data = [2, 250, 670, 890, 2, 430, 11, 908, 123, 57];
let s = super::render_u64_as_string(&data);
println!("{}", s);
assert_eq!("▁▂▆▇▁▄▁█▁▁", &s);
}

#[test]
fn render_f64() {
let data = [2., 250., 670., 890., 2., 430., 11., 908., 123., 57.];
let mut s = String::with_capacity(super::required_capacity(data.len()));
super::render_f64(&data, &mut s);
println!("{}", s);
assert_eq!("▁▂▆▇▁▄▁█▁▁", &s);
}

#[test]
fn render_f64_as_string() {
let data = [2., 250., 670., 890., 2., 430., 11., 908., 123., 57.];
let s = super::render_f64_as_string(&data);
println!("{}", s);
assert_eq!("▁▂▆▇▁▄▁█▁▁", &s);
}
}
2 changes: 2 additions & 0 deletions rand_distr/tests/uniformity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

#![allow(clippy::float_cmp)]

use average::Histogram;
use rand::prelude::*;

Expand Down
6 changes: 6 additions & 0 deletions rand_hc/src/hc128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.

// Disable some noisy clippy lints.
#![allow(clippy::many_single_char_names)]
#![allow(clippy::identity_op)]
// Disable a lint that cannot be fixed without increasing the MSRV
#![allow(clippy::op_ref)]

//! The HC-128 random number generator.

use core::fmt;
Expand Down
5 changes: 4 additions & 1 deletion src/distributions/bernoulli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ impl Bernoulli {
/// 2<sup>-64</sup> in `[0, 1]` can be represented as a `f64`.)
#[inline]
pub fn new(p: f64) -> Result<Bernoulli, BernoulliError> {
if !(p >= 0.0 && p < 1.0) {
if !(0.0..1.0).contains(&p) {
if p == 1.0 {
return Ok(Bernoulli { p_int: ALWAYS_TRUE });
}
Expand Down Expand Up @@ -157,6 +157,9 @@ mod test {

#[test]
fn test_trivial() {
// We prefer to be explicit here.
#![allow(clippy::bool_assert_comparison)]
dhardy marked this conversation as resolved.
Show resolved Hide resolved

let mut r = crate::test::rng(1);
let always_false = Bernoulli::new(0.0).unwrap();
let always_true = Bernoulli::new(1.0).unwrap();
Expand Down
Loading