Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Changelog
=========

0.1.1 (2021-12-18)
------------------

Fixes
~~~~~
- ``standard_normal`` sometimes produces ``inf`` [#1]


0.1 (2021-12-06)
----------------

- Initial release
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ seed = 123
parallel_rng = parallel_numpy_rng.default_rng(seed)
numpy_rng = np.random.default_rng(seed)

%timeit numpy_rng.random(size=10**9, dtype=np.float32) # 2.89 s
%timeit parallel_rng.random(size=10**9, dtype=np.float32, nthread=1) # 3.35 s
%timeit parallel_rng.random(size=10**9, dtype=np.float32, nthread=128) # 73.9 ms
%timeit numpy_rng.random(size=10**9, dtype=np.float32) # 2.85 s
%timeit parallel_rng.random(size=10**9, dtype=np.float32, nthread=1) # 3.34 s
%timeit parallel_rng.random(size=10**9, dtype=np.float32, nthread=128) # 67.8 ms

%timeit numpy_rng.standard_normal(size=10**8, dtype=np.float32) # 1.13 s
%timeit parallel_rng.standard_normal(size=10**8,dtype=np.float32, nthread=1) # 1.87 s
%timeit parallel_rng.standard_normal(size=10**8, dtype=np.float32, nthread=128) # 36.6 ms
%timeit numpy_rng.standard_normal(size=10**8, dtype=np.float32) # 1.12 s
%timeit parallel_rng.standard_normal(size=10**8,dtype=np.float32, nthread=1) # 1.85 s
%timeit parallel_rng.standard_normal(size=10**8, dtype=np.float32, nthread=128) # 43.5 ms
```

Note that the `parallel_rng` is slower than Numpy when using a single thread, because the parallel implementation requires a slower algorithm in some cases (this is especially noticeable for float64 and normals)
Expand Down
69 changes: 43 additions & 26 deletions parallel_numpy_rng.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class MTGenerator:

Usage
-----
```python
```
p = np.random.PCG64(123) # or PCG64DXSM
mtg = MTGenerator(p)
r1 = mtg.random(size=16, nthread=2, dtype=np.float32)
Expand Down Expand Up @@ -58,7 +58,9 @@ def __init__(self, bit_generator, nthread=-1):
nthread = len(os.sched_getaffinity(0))
self.nthread = nthread

self._next_float = _next_float[bit_generator.state['bit_generator']]
# each is a dict, keyed on dtype
self._next_float = _next_float[bit_generator.state['bit_generator']]['zero']
self._next_float_nonzero = _next_float[bit_generator.state['bit_generator']]['nonzero']

self._cached_normal = None

Expand All @@ -81,12 +83,8 @@ def random(self, size=None, nthread=None, out=None, verify_rng=True, dtype=np.fl

if out is None:
out = np.empty(size, dtype=dtype)
if dtype == np.float64:
_random(states, starts, out, self.bitgen.ctypes.next_double)
elif dtype == np.float32:
_random(states, starts, out, self._next_float)
else:
raise ValueError(dtype)
next_float = self._next_float[dtype]
_random(states, starts, out, next_float)

if verify_rng:
# Did we advance each RNG by the right amount?
Expand Down Expand Up @@ -159,12 +157,8 @@ def standard_normal(self, size=None, nthread=None, out=None, verify_rng=True, dt

# now offset the out array
_out = out[first:]
if dtype == np.float64:
_cached_normal = _boxmuller(states, ff, _out, self.bitgen.ctypes.next_double)
elif dtype == np.float32:
_cached_normal = _boxmuller(states, ff, _out, self._next_float)
else:
raise ValueError(dtype)
next_float_nonzero = self._next_float_nonzero[dtype]
_cached_normal = _boxmuller(states, ff, _out, next_float_nonzero)
if not np.isnan(_cached_normal):
self._cached_normal = _cached_normal
del _out
Expand Down Expand Up @@ -236,19 +230,42 @@ def _boxmuller(states, starts, out, next_double):

return cache[0]


# initialize some numba functions
_next_uint32_pcg64 = np.random.PCG64().ctypes.next_uint32
@njit(fastmath=True)
def _next_float_pcg64(state):
return np.float32(np.int32(_next_uint32_pcg64(state) >> np.uint32(8)) * (np.float32(1.) / np.float32(16777216.)))
# TODO: there are now enough of these that they should live in their own module
# TODO: for very low latency cases, there may be benefit to inlining these

_next_float={'PCG64':_next_float_pcg64}
def _generate_int_to_float(bitgen):
# initialize the numba functions to convert ints to floats
_next_uint32_pcg64 = bitgen().ctypes.next_uint32
_next_uint64_pcg64 = bitgen().ctypes.next_uint64

@njit(fastmath=True)
def _next_float_pcg64(state):
'''Random float in the semi-open interval [0,1)'''
return np.float32(np.int32(_next_uint32_pcg64(state) >> np.uint32(8)) * (np.float32(1.) / np.float32(16777216.)))

@njit(fastmath=True)
def _next_float_pcg64_nonzero(state):
'''Random float in the semi-open interval (0,1]'''
return np.float32((np.int32(_next_uint32_pcg64(state) >> np.uint32(8)) + np.int32(1)) * (np.float32(1.) / np.float32(16777216.)))

@njit(fastmath=True)
def _next_double_pcg64(state):
'''Random double in the semi-open interval [0,1)'''
return np.float64(np.int64(_next_uint64_pcg64(state) >> np.uint64(11)) * (np.float64(1.) / np.float64(9007199254740992.)))

@njit(fastmath=True)
def _next_double_pcg64_nonzero(state):
'''Random double in the semi-open interval (0,1]'''
return np.float64((np.int64(_next_uint64_pcg64(state) >> np.uint64(11)) + np.int64(1)) * (np.float64(1.) / np.float64(9007199254740992.)))

_next_float = {'zero': {np.float32:_next_float_pcg64, np.float64:_next_double_pcg64},
'nonzero': {np.float32:_next_float_pcg64_nonzero, np.float64:_next_double_pcg64_nonzero},
}
return _next_float

_next_float = {}
_next_float['PCG64'] = _generate_int_to_float(np.random.PCG64)

if hasattr(np.random, 'PCG64DXSM'):
# Numpy >= 1.21
_next_uint32_pcg64dxsm = np.random.PCG64DXSM().ctypes.next_uint32
@njit(fastmath=True)
def _next_float_pcg64dxsm(state):
return np.float32(np.int32(_next_uint32_pcg64dxsm(state) >> np.uint32(8)) * (np.float32(1.) / np.float32(16777216.)))
_next_float['PCG64DXSM'] = _next_float_pcg64dxsm
_next_float['PCG64XDSM'] = _generate_int_to_float(np.random.PCG64DXSM)
18 changes: 18 additions & 0 deletions test_parallel_numpy_rng.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,21 @@ def test_uniform_matches_numpy(someN, seed, nthread, dtype):
assert np.allclose(a, b, atol=1e-7)
else:
raise ValueError(dtype)


def test_finite_normals_float32():
'''
If the floats fed to Box-Muller can include 0, it will produce infinity.
We use the interval (0,1] to avoid this.

In theory, we ought to test float64 the same way. But it's hard to find
a PCG state that produces 53 zeros...

https://github.com/lgarrison/parallel-numpy-rng/issues/1
'''
from parallel_numpy_rng import MTGenerator
pcg = np.random.PCG64(1194)
mtg = MTGenerator(pcg)
a = mtg.standard_normal(size=20000, nthread=maxthreads, dtype=np.float32)
assert np.all(np.isfinite(a))