Skip to content

Commit

Permalink
Allow sampling from a closed integer range
Browse files Browse the repository at this point in the history
These changes make it possible to sample from closed ranges, not only from open.

Included is a small optimisation for the modulus operator, and an optimisation
for the types i8/u8 and i16/u16.
  • Loading branch information
Paul Dicker committed Sep 2, 2017
1 parent 33a1e7f commit fc0a6e5
Showing 1 changed file with 113 additions and 52 deletions.
165 changes: 113 additions & 52 deletions src/distributions/range2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,20 @@
// except according to those terms.

//! Alternative design for `Range`.
//!
//!
//! TODO: decide whether to replace the old `range` module with this.
//! Advantage: float ranges don't have to store a "zone" parameter.
//! Advantage: custom implementations can store extra parameters.
//! Possible advantage: easier implementations for custom types written in
//! terms of implementations of other types.
//! Disadvantage: complex?
//!
//!
//! This is *almost* like having separate `RangeInt<T>`, `RangeFloat<T>`,
//! etc. (or just `RangeI32`, etc.) types, each implementing `Distribution`,
//! but it adds some magic to support generic `range` and `new_range` methods.
use core::num::Wrapping as w;

use Rng;
use distributions::{Distribution, Uniform01, Rand};
use distributions::{Distribution, uniform, Uniform01, Rand};

/// Sample values uniformly between two bounds.
///
Expand All @@ -39,7 +37,7 @@ use distributions::{Distribution, Uniform01, Rand};
/// including `high`, but this may be very difficult. All the
/// primitive integer types satisfy this property, and the float types
/// normally satisfy it, but rounding may mean `high` can occur.
///
///
/// # Example
///
/// ```rust
Expand Down Expand Up @@ -87,13 +85,15 @@ pub trait SampleRange: PartialOrd+Sized {
pub trait RangeImpl {
/// The type sampled by this implementation.
type X: PartialOrd;

/// Construct self.
///
///
/// This should not be called directly. `Range::new` asserts that
/// `low < high` before calling this.
fn new(low: Self::X, high: Self::X) -> Self;


fn new_inclusive(low: Self::X, high: Self::X) -> Self;

/// Sample a value.
fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X;
}
Expand All @@ -107,35 +107,73 @@ pub struct RangeInt<X> {
}

macro_rules! range_int_impl {
($ty:ty, $unsigned:ident) => {
($ty:ty, $signed:ident, $unsigned:ident,
$signed_lg:ident, $unsigned_lg:ident) => {
impl SampleRange for $ty {
type T = RangeInt<$ty>;
}

impl RangeImpl for RangeInt<$ty> {
// we play free and fast with unsigned vs signed here
// We play free and fast with unsigned vs signed here
// (when $ty is signed), but that's fine, since the
// contract of this macro is for $ty and $unsigned to be
// "bit-equal", so casting between them is a no-op & a
// bijection.
// "bit-equal", so casting between them is a no-op.

type X = $ty;

fn new(low: Self::X, high: Self::X) -> Self {
let range = (w(high as $unsigned) - w(low as $unsigned)).0;
let unsigned_max: $unsigned = ::core::$unsigned::MAX;

// We want to calculate type_range % range where type_range is
// pow(2, n_bits($ty)), but we can't represent type_range.
// (type_range - range) % range is equivalent, since we know
// type_range > range. Since range >= 1,
// type_range - range = (unsigned_max - range) + 1.
let ignore_zone = ((unsigned_max - range) + 1) % range;
// We want to sample from the zone
// [0, (type_range - ignore_zone))
// however, ignore_zone may be zero. Instead use a closed range
// from zero to:
let zone = unsigned_max - ignore_zone;
RangeImpl::new_inclusive(low, high - 1)
}

fn new_inclusive(low: Self::X, high: Self::X) -> Self {
// For a closed range the number of possible numbers we should
// generate is `range = (high - low + 1)`. It is not possible to
// end up with a uniform distribution if we map _all_ the random
// integers that can be generated to this range. We have to map
// integers from a `zone` that is a multiple of the range. The
// rest of the integers, that cause a bias, are rejected. The
// sampled number is `zone % range`.
//
// The problem with `range` is that to cover the full range of
// the type, it has to store `unsigned_max + 1`, which can't be
// represented. But if the range covers the full range of the
// type, no modulus is needed. A range of size 0 can't exist, so
// we use that to represent this special case. Wrapping
// arithmetic even makes representing `unsigned_max + 1` as 0
// simple.
//
// We don't calculate zone directly, but first calculate the
// number of integers to reject. To handle `unsigned_max + 1`
// not fitting in the type, we use:
// ints_to_reject = (unsigned_max + 1) % range;
// ints_to_reject = (unsigned_max - range + 1) % range;
//
// The smallest integer prngs generate is u32. That is why for
// small integer sizes (i8/u8 and i16/u16) there is an
// optimisation: don't pick the largest zone that can fit in the
// small type, but pick the largest zone that can fit in an u32.
// This improves the chance to get a random integer that fits in
// the zone to 998 in 1000 in the worst case.
//
// There is a problem however: we can't store such a large range
// in `RangeInt`, that can only hold values of the size of $ty.
// `ints_to_reject` is always less than half the size of the
// small integer. For an u8 it only ever uses 7 bits. This means
// that all but the last 7 bits of `zone` are always 1's (or 15
// in the case of u16). So nothing is lost by trucating `zone`.

let unsigned_max: $unsigned_lg = ::core::$unsigned_lg::MAX;

let range = (high as $unsigned_lg)
.wrapping_sub(low as $unsigned_lg)
.wrapping_add(1);
let ints_to_reject =
if range > 0 {
(unsigned_max - range + 1) % range
} else {
0
};
let zone = unsigned_max - ints_to_reject;

RangeInt {
low: low,
Expand All @@ -144,36 +182,50 @@ macro_rules! range_int_impl {
zone: zone as $ty
}
}

fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X {
use $crate::distributions::uniform;
let range = self.range as $unsigned as $unsigned_lg;
// Some casting to recover the trucated bits of `zone`:
// First bit-cast to a signed int. Next sign-extend to the
// larger type. Then bit-cast to unsigned.
// For types that already have the right size, all the casting
// is a no-op.
let zone = self.zone as $signed as $signed_lg as $unsigned_lg;
let mut v: $unsigned_lg;
loop {
let v: $unsigned = uniform(rng);
// Reject samples not between 0 and zone:
if v <= self.zone as $unsigned {
// Adjustment sample for range and low value:
return (w(self.low) + w((v % self.range as $unsigned) as $ty)).0;
}
v = uniform(rng);
if v <= zone { break; }
}
if range > 0 {
// Checking if `range > 0` makes it possible for the
// optimizer to remove the overflow check and possible panic
// from the modulus operation. The makes the function
// 5%~100% faster, depending on the type.
self.low.wrapping_add((v % range) as $ty)
} else {
// `range == 0` means we sample from the entire integer
// range. No need for adjustments.
v as $ty
}
}
}
}
}

range_int_impl! { i8, u8 }
range_int_impl! { i16, u16 }
range_int_impl! { i32, u32 }
range_int_impl! { i64, u64 }
range_int_impl! { i8, i8, u8, i32, u32 }
range_int_impl! { i16, i16, u16, i32, u32 }
range_int_impl! { i32, i32, u32, i32, u32 }
range_int_impl! { i64, i64, u64, i64, u64 }
#[cfg(feature = "i128_support")]
range_int_impl! { i128, u128 }
range_int_impl! { isize, usize }
range_int_impl! { u8, u8 }
range_int_impl! { u16, u16 }
range_int_impl! { u32, u32 }
range_int_impl! { u64, u64 }
range_int_impl! { i128, i128, u128, u128 }
range_int_impl! { isize, isize, usize, isize, usize }
range_int_impl! { u8, i8, u8, i32, u32 }
range_int_impl! { u16, i16, u16, i32, u32 }
range_int_impl! { u32, i32, u32, i32, u32 }
range_int_impl! { u64, i64, u64, i64, u64 }
#[cfg(feature = "i128_support")]
range_int_impl! { u128, u128 }
range_int_impl! { usize, usize }
range_int_impl! { u128, u128, u128, i128, u128 }
range_int_impl! { usize, isize, usize, isize, usize }

/// Implementation of `RangeImpl` for float types.
#[derive(Clone, Copy, Debug)]
Expand All @@ -187,17 +239,23 @@ macro_rules! range_float_impl {
impl SampleRange for $ty {
type T = RangeFloat<$ty>;
}

impl RangeImpl for RangeFloat<$ty> {
type X = $ty;

fn new(low: Self::X, high: Self::X) -> Self {
RangeFloat {
low: low,
range: high - low,
}
}


fn new_inclusive(low: Self::X, high: Self::X) -> Self {
// Same as `new`, because the boundaries of a floats range are
// (at least currently) not exact due to rounding errors.
RangeImpl::new(low, high)
}

fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X {
let x: $ty = Rand::rand(rng, Uniform01);
self.low + self.range * x
Expand Down Expand Up @@ -324,6 +382,9 @@ mod tests {
inner: RangeFloat::<f32>::new(low.x, high.x),
}
}
fn new_inclusive(low: Self::X, high: Self::X) -> Self {
RangeImpl::new(low, high)
}
fn sample<R: Rng+?Sized>(&self, rng: &mut R) -> Self::X {
MyF32 { x: self.inner.sample(rng) }
}
Expand Down

0 comments on commit fc0a6e5

Please sign in to comment.