From 88874a4b65b9eab78c45c7543e89e9bb506f5414 Mon Sep 17 00:00:00 2001 From: Amanda Lund Date: Tue, 5 Dec 2023 01:40:45 +0000 Subject: [PATCH 1/6] Implement jump ahead for XORWOW RNG --- src/celeritas/random/XorwowRngData.hh | 28 ++- src/celeritas/random/XorwowRngEngine.hh | 233 ++++++++++++++++++++++-- src/celeritas/random/XorwowRngParams.cc | 53 ++++++ src/celeritas/random/XorwowRngParams.hh | 13 ++ 4 files changed, 312 insertions(+), 15 deletions(-) diff --git a/src/celeritas/random/XorwowRngData.hh b/src/celeritas/random/XorwowRngData.hh index 14f070febe..6e83e487bd 100644 --- a/src/celeritas/random/XorwowRngData.hh +++ b/src/celeritas/random/XorwowRngData.hh @@ -25,12 +25,27 @@ namespace celeritas template struct XorwowRngParamsData { + //// TYPES //// + + using uint_t = unsigned int; + using JumpPoly = Array; + using ArrayJumpPoly = Array; + + //// DATA //// + // TODO: 256-bit seed used to generate initial states for the RNGs // For now, just 4 bytes (same as our existing cuda/hip interface) - Array seed; + Array seed; + + // Jump polynomials + ArrayJumpPoly jump; + ArrayJumpPoly jump_subsequence; //// METHODS //// + static CELER_CONSTEXPR_FUNCTION size_type num_words() { return 5; } + static CELER_CONSTEXPR_FUNCTION size_type num_bits() { return 32; } + //! Whether the data is assigned explicit CELER_FUNCTION operator bool() const { return true; } @@ -44,6 +59,17 @@ struct XorwowRngParamsData } }; +//---------------------------------------------------------------------------// +/*! + * Initialize an RNG. + */ +struct XorwowRngInitializer +{ + ull_int seed{0}; + ull_int subsequence{0}; + ull_int offset{0}; +}; + //---------------------------------------------------------------------------// //! Individual RNG state struct XorwowState diff --git a/src/celeritas/random/XorwowRngEngine.hh b/src/celeritas/random/XorwowRngEngine.hh index 9bf64dd86b..37aaadac30 100644 --- a/src/celeritas/random/XorwowRngEngine.hh +++ b/src/celeritas/random/XorwowRngEngine.hh @@ -26,22 +26,29 @@ namespace celeritas * sampling of uniform floating point data is done with specializations to the * GenerateCanonical class. * - * This class does not define an initializer because it is assumed that the - * state has been fully randomized at initialization (see the \c resize - * function for \c XorwowRngStateData.) + * The \c resize function for \c XorwowRngStateData will fully randomize the + * state at initialization. Alternatively, the state can be initialized with a + * seed, subsequence, and offset. * * See Marsaglia (2003) for the theory underlying the algorithm and the the * "example" \c xorwow that combines an \em xorshift output with a Weyl - * sequence. + * sequence (https://www.jstatsoft.org/index.php/jss/article/view/v008i14/916). * - * https://www.jstatsoft.org/index.php/jss/article/view/v008i14/916 + * For a description of the jump ahead method using the polynomial + * representation of the recurrance, see: Haramoto, H., Matsumoto, M., + * Nishimura, T., Panneton, F., L’Ecuyer, P. 2008. "Efficient jump ahead for + * F2-linear random number generators". INFORMS Journal on Computing. + * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251 */ class XorwowRngEngine { public: //!@{ //! \name Type aliases - using result_type = unsigned int; + using uint_t = unsigned int; + using result_type = uint_t; + using Initializer_t = XorwowRngInitializer; + using ParamsRef = NativeCRef; using StateRef = NativeRef; //!@} @@ -51,15 +58,48 @@ class XorwowRngEngine //! Highest value potentially generated static CELER_CONSTEXPR_FUNCTION result_type max() { return 0xffffffffu; } - // Construct from state - inline CELER_FUNCTION - XorwowRngEngine(StateRef const& state, TrackSlotId const& id); + // Construct from state and persistent data + inline CELER_FUNCTION XorwowRngEngine(ParamsRef const& params, + StateRef const& state, + TrackSlotId tid); + + // Initialize state + inline CELER_FUNCTION XorwowRngEngine& operator=(Initializer_t const&); // Generate a 32-bit pseudorandom number inline CELER_FUNCTION result_type operator()(); + // Apply the transformation to the state + inline CELER_FUNCTION void next(); + + // Jump ahead \c n steps + inline CELER_FUNCTION void discard(ull_int n); + + // Jump ahead \c n subsequences (\c n * 2^67 steps) + inline CELER_FUNCTION void discard_subsequence(ull_int n); + private: + /// TYPES /// + + using JumpPoly = Array; + using ArrayJumpPoly = Array; + + /// DATA /// + + ParamsRef const& params_; XorwowState* state_; + + //// HELPER FUNCTIONS //// + + inline CELER_FUNCTION void jump(ull_int, ArrayJumpPoly const&); + inline CELER_FUNCTION void jump(JumpPoly const&); + + // Helper RNG for initializing the state + struct SplitMix64 + { + uint64_t state; + inline CELER_FUNCTION uint64_t operator()(); + }; }; //---------------------------------------------------------------------------// @@ -88,13 +128,53 @@ class GenerateCanonical // INLINE DEFINITIONS //---------------------------------------------------------------------------// /*! - * Construct from state. + * Construct from state and persistent data. */ CELER_FUNCTION -XorwowRngEngine::XorwowRngEngine(StateRef const& state, TrackSlotId const& id) +XorwowRngEngine::XorwowRngEngine(ParamsRef const& params, + StateRef const& state, + TrackSlotId tid) + : params_(params) { - CELER_EXPECT(id < state.state.size()); - state_ = &state.state[id]; + CELER_EXPECT(tid < state.state.size()); + state_ = &state.state[tid]; +} + +//---------------------------------------------------------------------------// +/*! + * Initialize the RNG engine. + * + * This moves the state ahead to the given subsequence (a subsequence has size + * 2^67) and skips \c offset random numbers. + * + * It is recommended to initialize the state using a very different generator + * from the one being initialized to avoid correlations. Here the 64-bit + * SplitMis64 generator is used for initialization (See Matsumoto, Wada, + * Kuramoto, and Ashihara, "Common defects in initialization of pseudorandom + * number generators". https://dl.acm.org/doi/10.1145/1276927.1276928.) + */ +CELER_FUNCTION XorwowRngEngine& +XorwowRngEngine::operator=(Initializer_t const& init) +{ + auto& s = state_->xorstate; + + // Initialize the state from the seed + SplitMix64 rng{init.seed}; + uint64_t seed = rng(); + s[0] = static_cast(seed); + s[1] = static_cast(seed >> 32); + seed = rng(); + s[2] = static_cast(seed); + s[3] = static_cast(seed >> 32); + seed = rng(); + s[4] = static_cast(seed); + state_->weylstate = static_cast(seed >> 32); + + // Skip ahead + this->discard_subsequence(init.subsequence); + this->discard(init.offset); + + return *this; } //---------------------------------------------------------------------------// @@ -102,6 +182,16 @@ XorwowRngEngine::XorwowRngEngine(StateRef const& state, TrackSlotId const& id) * Generate a 32-bit pseudorandom number using the 'xorwow' engine. */ CELER_FUNCTION auto XorwowRngEngine::operator()() -> result_type +{ + this->next(); + return state_->weylstate + state_->xorstate[4]; +} + +//---------------------------------------------------------------------------// +/*! + * Apply the transformation to the state. + */ +CELER_FUNCTION void XorwowRngEngine::next() { auto& s = state_->xorstate; auto const t = (s[0] ^ (s[0] >> 2u)); @@ -113,7 +203,122 @@ CELER_FUNCTION auto XorwowRngEngine::operator()() -> result_type s[4] = (s[4] ^ (s[4] << 4u)) ^ (t ^ (t << 1u)); state_->weylstate += 362437u; - return state_->weylstate + s[4]; +} + +//---------------------------------------------------------------------------// +/*! + * Advance the state \c n steps. + */ +CELER_FUNCTION void XorwowRngEngine::discard(ull_int n) +{ + this->jump(n, params_.jump); + state_->weylstate += static_cast(n) * 362437u; +} + +//---------------------------------------------------------------------------// +/*! + * Advance the state \c n subsequences (\c n * 2^67 steps). + * + * Note that the Weyl sequence value remains the same since it has period 2^32 + * which divides evenly into 2^67. + */ +CELER_FUNCTION void XorwowRngEngine::discard_subsequence(ull_int n) +{ + this->jump(n, params_.jump_subsequence); +} + +//---------------------------------------------------------------------------// +/*! + * Jump ahead \c n steps or subsequences. + * + * This applies the jump polynomials until the given number of steps or + * subsequences have been skipped. + */ +CELER_FUNCTION void +XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) +{ + // Maximum number of times to apply any jump polynomial. Since the jump + // sizes are 4^i for i = [0, 10), the max is 3. + constexpr size_type max_num_jump = 3; + + // Start with the smallest jump (either one step or one subsequence) + size_type jp_idx = 0; + while (n > 0) + { + // Number of times to apply this jump polynomial + uint_t num_jump = static_cast(n) & max_num_jump; + for (size_type i = 0; i < num_jump; ++i) + { + this->jump(jump_poly_arr[jp_idx]); + } + ++jp_idx; + n >>= 2; + } +} + +//---------------------------------------------------------------------------// +/*! + * Jump ahead using the given jump polynomial. + * + * This uses the polynomial representation to apply the recurrance to the + * state. The theory is described in + * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. Let + * \f[ + g(z) = z^d \mod p(z) = a_1 z^{k-1} + ... + a_{k-1} z + a_k, + * \f] + * where \f$ p(z) = det(zI + T) \f$ is the characteristic polynomial and \f$ T + * \f$ is the transformation matrix. Observing that \f$ g(z) = z^d q(z)p(z) \f$ + * for some polynomial \f$ q(z) \f$ and that \f$ p(T) = 0 \f$ (a fundamental + * property of the characteristic polynomial, it follows that + * \f[ + g(T) = T^d = a_1 A^{k-1} + ... + a_{k-1} A + a_k I. + * \f] + * Therefore, using the precalculated coefficients of the jump polynomial \f$ + * g(z) \f$ and Horner's method for polynomial evaluation, the state after \f$ + * d \f$ steps can be computed as + * \f[ + T^d x = T(...T(T(T a_1 x + a_2 x) + a_3 x) + ... + a_{k-1} x) + a_k x. + * \f] + * Note that applying \f$ T \f$ to \f$ x \f$ is equivalent to calling \c + * next(), and that in \f$ F_2 \f$, the finite field with two elements, + * addition is the same as subtraction and equivalent to bitwise exclusive or. + * Multiplication is bitwise and. + */ +CELER_FUNCTION void XorwowRngEngine::jump(JumpPoly const& jump_poly) +{ + constexpr size_type num_bits = 32; + constexpr size_type num_words = 5; + + Array s = {0}; + for (size_type i : range(num_words)) + { + for (size_type j : range(num_bits)) + { + if (jump_poly[i] & (1 << j)) + { + for (size_type k : range(num_words)) + { + s[k] ^= state_->xorstate[k]; + } + } + this->next(); + } + } + state_->xorstate = s; +} + +//---------------------------------------------------------------------------// +/*! + * Generate a 64-bit pseudorandom number using the SplitMix64 engine. + * + * This is used to initialize the XORWOW state. See https://prng.di.unimi.it. + */ +CELER_FUNCTION uint64_t XorwowRngEngine::SplitMix64::operator()() +{ + uint64_t z = (state += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/random/XorwowRngParams.cc b/src/celeritas/random/XorwowRngParams.cc index 25fc1fe9cc..91eac3a8f8 100644 --- a/src/celeritas/random/XorwowRngParams.cc +++ b/src/celeritas/random/XorwowRngParams.cc @@ -23,9 +23,62 @@ XorwowRngParams::XorwowRngParams(unsigned int seed) { HostVal host_data; host_data.seed = {seed}; + host_data.jump = this->get_jump_poly(); + host_data.jump_subsequence = this->get_jump_subsequence_poly(); CELER_ASSERT(host_data); data_ = CollectionMirror{std::move(host_data)}; } +//---------------------------------------------------------------------------// +/*! + * Get the jump polynomials. + * + * The jump polynomials (as well as the jump matrices) are calculated using + * https://github.com/celeritas-project/utils/blob/main/prng/xorwow-jump.py. + * + * The coefficients of the polynomial are packed into a 160-bit integer, which + * is then split into 5 32-bit integers, ordered from the lower 32 bits to the + * upper 32 bits. + * + * The jump sizes are 4^i for i = [0, 10). + */ +auto XorwowRngParams::get_jump_poly() -> ArrayJumpPoly const& +{ + static ArrayJumpPoly const jump + = {{{0x2, 0x0, 0x0, 0x0, 0x0}, + {0x10, 0x0, 0x0, 0x0, 0x0}, + {0x10000, 0x0, 0x0, 0x0, 0x0}, + {0x0, 0x0, 0x1, 0x0, 0x0}, + {0xbebd3534, 0x7064f5bc, 0x20be29eb, 0x536d5b32, 0x63a0069}, + {0xed64ec08, 0xafc48684, 0xd81c59ee, 0x1640314f, 0x2bf0ccef}, + {0x9d10e028, 0xee56d79c, 0xfb1b3286, 0xf747418, 0x26f9476d}, + {0x3f490634, 0x9ae593fc, 0x1a95bb6b, 0xda10a3fc, 0xa3abaf54}, + {0xfb9680e9, 0xbdaba0b2, 0x3986540f, 0x23fe6ccc, 0x994e82f}, + {0x32da6db4, 0x80135829, 0x3abd4734, 0x2060c3f9, 0x38b2dd97}}}; + return jump; +} + +//---------------------------------------------------------------------------// +/*! + * Get the jump polynomials for jumping over subsequences. + * + * The jump sizes are 4^i * 2^67 for i = [0, 10). + */ +auto XorwowRngParams::get_jump_subsequence_poly() -> ArrayJumpPoly const& +{ + static ArrayJumpPoly const jump_subsequence + = {{{0x26294934, 0x77bbc248, 0x1a87dad0, 0x930052d4, 0x947e6dd2}, + {0xa7474d19, 0x37c549e0, 0x140877d2, 0x24c43924, 0xcd52ebec}, + {0xfcc8f692, 0x35aa698a, 0x4ebbf85, 0x448304b0, 0x82f3fd5}, + {0xe502f5b3, 0x77859d31, 0x97e1cbb3, 0xea09047a, 0x61d5f37e}, + {0xb3a15db4, 0xc6df3330, 0x1a8be751, 0xf1e4b221, 0x6bb61c05}, + {0x19e2bee8, 0xf8974218, 0x4e65536a, 0xa2570336, 0xe9b88082}, + {0xe2cae2e, 0xd1011279, 0x58923768, 0x2bf650ba, 0xc985bcda}, + {0x146281e7, 0xa45b4452, 0xafa8c695, 0x74a0ac94, 0x4b250e0a}, + {0x89658c8b, 0xf3914315, 0xa73fe84b, 0x3c5fadb6, 0xadba8dd6}, + {0x82161afc, 0x9bb13c55, 0xfd20d7fb, 0x306d90b9, 0x92bf386f}}}; + return jump_subsequence; +} + //---------------------------------------------------------------------------// } // namespace celeritas diff --git a/src/celeritas/random/XorwowRngParams.hh b/src/celeritas/random/XorwowRngParams.hh index 9f147bcca0..9c20e267d9 100644 --- a/src/celeritas/random/XorwowRngParams.hh +++ b/src/celeritas/random/XorwowRngParams.hh @@ -8,6 +8,7 @@ #pragma once #include "corecel/Types.hh" +#include "corecel/cont/Array.hh" #include "corecel/data/CollectionMirror.hh" #include "corecel/data/ParamsDataInterface.hh" @@ -35,8 +36,20 @@ class XorwowRngParams final : public ParamsDataInterface DeviceRef const& device_ref() const final { return data_.device(); } private: + //// DATA //// + // Host/device storage and reference CollectionMirror data_; + + //// TYPES //// + + using JumpPoly = Array; + using ArrayJumpPoly = Array; + + //// HELPER FUNCTIONS //// + + ArrayJumpPoly const& get_jump_poly(); + ArrayJumpPoly const& get_jump_subsequence_poly(); }; //---------------------------------------------------------------------------// From 41236f4af335324d1f143d82a4ed4ca2dfd876f1 Mon Sep 17 00:00:00 2001 From: Amanda Lund Date: Tue, 5 Dec 2023 03:02:05 +0000 Subject: [PATCH 2/6] Construct both RNG engines with params --- app/demo-interactor/KNDemoKernel.cu | 4 +-- app/demo-interactor/KNDemoKernel.hh | 2 ++ src/celeritas/CMakeLists.txt | 1 + src/celeritas/global/CoreTrackView.hh | 2 +- src/celeritas/random/CuHipRngData.cc | 17 +++++++---- src/celeritas/random/CuHipRngData.hh | 25 +++------------- src/celeritas/random/CuHipRngEngine.hh | 18 +++++++---- src/celeritas/random/CuHipRngParams.cc | 30 +++++++++++++++++++ src/celeritas/random/CuHipRngParams.hh | 21 ++++--------- .../random/detail/CuHipRngStateInit.cc | 9 ++++-- .../random/detail/CuHipRngStateInit.cu | 19 +++++++----- .../random/detail/CuHipRngStateInit.hh | 11 ++++--- test/celeritas/geo/HeuristicGeoExecutor.hh | 2 +- test/celeritas/random/RngEngine.test.cc | 6 ++-- test/celeritas/random/RngEngine.test.cu | 30 ++++++++++++------- test/celeritas/random/RngEngine.test.hh | 11 +++---- test/celeritas/random/XorwowRngEngine.test.cc | 2 +- 17 files changed, 125 insertions(+), 85 deletions(-) create mode 100644 src/celeritas/random/CuHipRngParams.cc diff --git a/app/demo-interactor/KNDemoKernel.cu b/app/demo-interactor/KNDemoKernel.cu index f3a5d6638d..6d975f9622 100644 --- a/app/demo-interactor/KNDemoKernel.cu +++ b/app/demo-interactor/KNDemoKernel.cu @@ -73,7 +73,7 @@ __global__ void move_kernel(DeviceCRef const params, // Construct particle accessor from immutable and thread-local data ParticleTrackView particle( params.particle, states.particle, TrackSlotId(tid)); - RngEngine rng(states.rng, TrackSlotId(tid)); + RngEngine rng(params.rng, states.rng, TrackSlotId(tid)); // Move to collision XsCalculator calc_xs(params.tables.xs, params.tables.reals); @@ -109,7 +109,7 @@ __global__ void interact_kernel(DeviceCRef const params, // Construct particle accessor from immutable and thread-local data ParticleTrackView particle( params.particle, states.particle, TrackSlotId(tid)); - RngEngine rng(states.rng, TrackSlotId(tid)); + RngEngine rng(params.rng, states.rng, TrackSlotId(tid)); Detector detector(params.detector, states.detector); diff --git a/app/demo-interactor/KNDemoKernel.hh b/app/demo-interactor/KNDemoKernel.hh index df80d6eb5f..253462ecaa 100644 --- a/app/demo-interactor/KNDemoKernel.hh +++ b/app/demo-interactor/KNDemoKernel.hh @@ -66,6 +66,7 @@ template struct ParamsData { ParticleParamsData particle; + RngParamsData rng; TableData tables; KleinNishinaData kn_interactor; DetectorParamsData detector; @@ -81,6 +82,7 @@ struct ParamsData { CELER_EXPECT(other); particle = other.particle; + rng = other.rng; tables = other.tables; kn_interactor = other.kn_interactor; return *this; diff --git a/src/celeritas/CMakeLists.txt b/src/celeritas/CMakeLists.txt index e3d820bdf6..217d9db3fb 100644 --- a/src/celeritas/CMakeLists.txt +++ b/src/celeritas/CMakeLists.txt @@ -67,6 +67,7 @@ list(APPEND SOURCES phys/Process.cc phys/ProcessBuilder.cc random/CuHipRngData.cc + random/CuHipRngParams.cc random/XorwowRngData.cc random/XorwowRngParams.cc track/SimParams.cc diff --git a/src/celeritas/global/CoreTrackView.hh b/src/celeritas/global/CoreTrackView.hh index efd58fa046..0a903d0227 100644 --- a/src/celeritas/global/CoreTrackView.hh +++ b/src/celeritas/global/CoreTrackView.hh @@ -213,7 +213,7 @@ CELER_FUNCTION auto CoreTrackView::make_physics_step_view() const */ CELER_FUNCTION auto CoreTrackView::make_rng_engine() const -> RngEngine { - return RngEngine{states_.rng, this->track_slot_id()}; + return RngEngine{params_.rng, states_.rng, this->track_slot_id()}; } //---------------------------------------------------------------------------// diff --git a/src/celeritas/random/CuHipRngData.cc b/src/celeritas/random/CuHipRngData.cc index fc87bc6af9..37a4390ad4 100644 --- a/src/celeritas/random/CuHipRngData.cc +++ b/src/celeritas/random/CuHipRngData.cc @@ -31,25 +31,30 @@ void resize(CuHipRngStateData* state, CELER_EXPECT(size > 0); CELER_EXPECT(M == MemSpace::host || celeritas::device()); - using RngInit = CuHipRngInitializer; - // Host-side RNG for creating seeds std::mt19937 host_rng(params.seed + stream.get()); std::uniform_int_distribution sample_uniform_int; // Create seeds for device in host memory - StateCollection host_seeds; + StateCollection host_seeds; resize(&host_seeds, size); - for (RngInit& init : host_seeds[AllItems{}]) + for (auto& seed : host_seeds[AllItems{}]) { - init.seed = sample_uniform_int(host_rng); + seed = sample_uniform_int(host_rng); } + // The params are needed in the correct memspace to initialize the engine + HostVal host_data; + host_data.seed = params.seed; + CuHipRngParamsData data; + data = host_data; + // Resize state data and assign resize(&state->rng, size); detail::CuHipRngInitData init_data; init_data.seeds = host_seeds; - detail::rng_state_init(make_ref(*state), make_const_ref(init_data)); + detail::rng_state_init( + make_const_ref(data), make_ref(*state), make_const_ref(init_data)); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/random/CuHipRngData.hh b/src/celeritas/random/CuHipRngData.hh index be9df7cb6f..3c31a0fe83 100644 --- a/src/celeritas/random/CuHipRngData.hh +++ b/src/celeritas/random/CuHipRngData.hh @@ -59,28 +59,9 @@ using CuHipRngThreadState = CELER_RNG_PREFIX(randState_t); //---------------------------------------------------------------------------// /*! * Properties of the global random number generator. - * - * There is no persistent data needed on device or at runtime: the params are - * only used for construction. */ template -struct CuHipRngParamsData; - -template -struct CuHipRngParamsData -{ - /* no data on device */ - - //! Assign from another set of data - template - CuHipRngParamsData& operator=(CuHipRngParamsData const&) - { - return *this; - } -}; - -template -struct CuHipRngParamsData +struct CuHipRngParamsData { //// DATA //// @@ -106,7 +87,9 @@ struct CuHipRngParamsData */ struct CuHipRngInitializer { - ull_int seed; + ull_int seed{0}; + ull_int subsequence{0}; + ull_int offset{0}; }; //---------------------------------------------------------------------------// diff --git a/src/celeritas/random/CuHipRngEngine.hh b/src/celeritas/random/CuHipRngEngine.hh index d3a21dc9ad..c261f4b201 100644 --- a/src/celeritas/random/CuHipRngEngine.hh +++ b/src/celeritas/random/CuHipRngEngine.hh @@ -31,13 +31,15 @@ class CuHipRngEngine //! \name Type aliases using result_type = unsigned int; using Initializer_t = CuHipRngInitializer; + using ParamsRef = NativeCRef; using StateRef = NativeRef; //!@} public: // Construct from state - inline CELER_FUNCTION - CuHipRngEngine(StateRef const& state, TrackSlotId const& id); + inline CELER_FUNCTION CuHipRngEngine(ParamsRef const& params, + StateRef const& state, + TrackSlotId tid); // Initialize state from seed inline CELER_FUNCTION CuHipRngEngine& operator=(Initializer_t const& s); @@ -46,6 +48,7 @@ class CuHipRngEngine inline CELER_FUNCTION result_type operator()(); private: + ParamsRef const& params_; CuHipRngThreadState* state_; template @@ -97,10 +100,13 @@ class GenerateCanonical * Construct from state. */ CELER_FUNCTION -CuHipRngEngine::CuHipRngEngine(StateRef const& state, TrackSlotId const& id) +CuHipRngEngine::CuHipRngEngine(ParamsRef const& params, + StateRef const& state, + TrackSlotId tid) + : params_(params) { - CELER_EXPECT(id < state.rng.size()); - state_ = &state.rng[id]; + CELER_EXPECT(tid < state.rng.size()); + state_ = &state.rng[tid]; } //---------------------------------------------------------------------------// @@ -109,7 +115,7 @@ CuHipRngEngine::CuHipRngEngine(StateRef const& state, TrackSlotId const& id) */ CELER_FUNCTION CuHipRngEngine& CuHipRngEngine::operator=(Initializer_t const& s) { - CELER_RNG_PREFIX(rand_init)(s.seed, 0, 0, state_); + CELER_RNG_PREFIX(rand_init)(s.seed, s.subsequence, s.offset, state_); return *this; } diff --git a/src/celeritas/random/CuHipRngParams.cc b/src/celeritas/random/CuHipRngParams.cc new file mode 100644 index 0000000000..030cf77cb9 --- /dev/null +++ b/src/celeritas/random/CuHipRngParams.cc @@ -0,0 +1,30 @@ +//----------------------------------*-C++-*----------------------------------// +// Copyright 2023 UT-Battelle, LLC, and other Celeritas developers. +// See the top-level COPYRIGHT file for details. +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +//---------------------------------------------------------------------------// +//! \file celeritas/random/CuHipRngParams.cc +//---------------------------------------------------------------------------// +#include "CuHipRngParams.hh" + +#include + +#include "corecel/Assert.hh" +#include "celeritas/random/CuHipRngData.hh" + +namespace celeritas +{ +//---------------------------------------------------------------------------// +/*! + * Construct with a seed. + */ +CuHipRngParams::CuHipRngParams(unsigned int seed) +{ + HostVal host_data; + host_data.seed = seed; + CELER_ASSERT(host_data); + data_ = CollectionMirror{std::move(host_data)}; +} + +//---------------------------------------------------------------------------// +} // namespace celeritas diff --git a/src/celeritas/random/CuHipRngParams.hh b/src/celeritas/random/CuHipRngParams.hh index 499044f07f..d7a569af83 100644 --- a/src/celeritas/random/CuHipRngParams.hh +++ b/src/celeritas/random/CuHipRngParams.hh @@ -7,6 +7,7 @@ //---------------------------------------------------------------------------// #pragma once +#include "corecel/data/CollectionMirror.hh" #include "corecel/data/ParamsDataInterface.hh" #include "CuHipRngData.hh" @@ -24,28 +25,18 @@ class CuHipRngParams final : public ParamsDataInterface { public: // Construct with seed - explicit inline CuHipRngParams(unsigned int seed); + explicit CuHipRngParams(unsigned int seed); //! Access RNG properties for constructing RNG state - HostRef const& host_ref() const final { return host_ref_; } + HostRef const& host_ref() const final { return data_.host(); } //! Access data on device - DeviceRef const& device_ref() const final { return device_ref_; } + DeviceRef const& device_ref() const final { return data_.device(); } private: - HostRef host_ref_; - DeviceRef device_ref_; + // Host/device storage and reference + CollectionMirror data_; }; //---------------------------------------------------------------------------// -// INLINE DEFINITIONS -//---------------------------------------------------------------------------// -/*! - * Construct with seed. - */ -CuHipRngParams::CuHipRngParams(unsigned int seed) -{ - host_ref_.seed = seed; -} - } // namespace celeritas diff --git a/src/celeritas/random/detail/CuHipRngStateInit.cc b/src/celeritas/random/detail/CuHipRngStateInit.cc index 4aed420201..3a0ca49369 100644 --- a/src/celeritas/random/detail/CuHipRngStateInit.cc +++ b/src/celeritas/random/detail/CuHipRngStateInit.cc @@ -21,13 +21,16 @@ namespace detail /*! * Initialize the RNG states from seeds randomly generated on host. */ -void rng_state_init(HostRef const& rng, +void rng_state_init(HostCRef const& params, + HostRef const& state, HostCRef const& seeds) { for (auto tid : range(TrackSlotId{seeds.size()})) { - CuHipRngEngine engine(rng, tid); - engine = seeds.seeds[tid]; + CuHipRngInitializer init; + init.seed = seeds.seeds[tid]; + CuHipRngEngine engine(params, state, tid); + engine = init; } } diff --git a/src/celeritas/random/detail/CuHipRngStateInit.cu b/src/celeritas/random/detail/CuHipRngStateInit.cu index 35ef3623e8..e0cc65d853 100644 --- a/src/celeritas/random/detail/CuHipRngStateInit.cu +++ b/src/celeritas/random/detail/CuHipRngStateInit.cu @@ -26,16 +26,20 @@ namespace /*! * Initialize the RNG states on device from seeds randomly generated on host. */ -__global__ void rng_state_init_kernel(DeviceRef const state, - DeviceCRef const init) +__global__ void +rng_state_init_kernel(DeviceCRef const params, + DeviceRef const state, + DeviceCRef const seeds) { auto tid = TrackSlotId{ celeritas::KernelParamCalculator::thread_id().unchecked_get()}; if (tid.get() < state.size()) { TrackSlotId tsid{tid.unchecked_get()}; - CuHipRngEngine rng(state, tsid); - rng = init.seeds[tsid]; + CuHipRngInitializer init; + init.seed = seeds.seeds[tsid]; + CuHipRngEngine rng(params, state, tsid); + rng = init; } } @@ -48,11 +52,12 @@ __global__ void rng_state_init_kernel(DeviceRef const state, /*! * Initialize the RNG states on device from seeds randomly generated on host. */ -void rng_state_init(DeviceRef const& rng, +void rng_state_init(DeviceCRef const& params, + DeviceRef const& state, DeviceCRef const& seeds) { - CELER_EXPECT(rng.size() == seeds.size()); - CELER_LAUNCH_KERNEL(rng_state_init, seeds.size(), 0, rng, seeds); + CELER_EXPECT(state.size() == seeds.size()); + CELER_LAUNCH_KERNEL(rng_state_init, seeds.size(), 0, params, state, seeds); } //---------------------------------------------------------------------------// diff --git a/src/celeritas/random/detail/CuHipRngStateInit.hh b/src/celeritas/random/detail/CuHipRngStateInit.hh index b261aa2185..f667931e30 100644 --- a/src/celeritas/random/detail/CuHipRngStateInit.hh +++ b/src/celeritas/random/detail/CuHipRngStateInit.hh @@ -22,7 +22,7 @@ namespace detail template struct CuHipRngInitData { - StateCollection seeds; + StateCollection seeds; //// METHODS //// @@ -44,10 +44,12 @@ struct CuHipRngInitData //---------------------------------------------------------------------------// // Initialize the RNG state on host/device -void rng_state_init(DeviceRef const& rng, +void rng_state_init(DeviceCRef const& params, + DeviceRef const& state, DeviceCRef const& seeds); -void rng_state_init(HostRef const& rng, +void rng_state_init(HostCRef const& params, + HostRef const& state, HostCRef const& seeds); #if !CELER_USE_DEVICE @@ -55,7 +57,8 @@ void rng_state_init(HostRef const& rng, /*! * Initialize the RNG states on device from seeds randomly generated on host. */ -inline void rng_state_init(DeviceRef const&, +inline void rng_state_init(DeviceCRef const&, + DeviceRef const&, DeviceCRef const&) { CELER_ASSERT_UNREACHABLE(); diff --git a/test/celeritas/geo/HeuristicGeoExecutor.hh b/test/celeritas/geo/HeuristicGeoExecutor.hh index 0781c368e4..2362a9986b 100644 --- a/test/celeritas/geo/HeuristicGeoExecutor.hh +++ b/test/celeritas/geo/HeuristicGeoExecutor.hh @@ -52,7 +52,7 @@ struct HeuristicGeoExecutor */ CELER_FUNCTION void HeuristicGeoExecutor::operator()(TrackSlotId tid) const { - RngEngine rng(state.rng, tid); + RngEngine rng(params.rng, state.rng, tid); GeoTrackView geo(params.geometry, state.geometry, tid); if (state.status[tid] == LifeStatus::unborn) { diff --git a/test/celeritas/random/RngEngine.test.cc b/test/celeritas/random/RngEngine.test.cc index 06b4e581a2..261a358cdd 100644 --- a/test/celeritas/random/RngEngine.test.cc +++ b/test/celeritas/random/RngEngine.test.cc @@ -177,7 +177,8 @@ TEST_F(DeviceRngEngineTest, TEST_IF_CELER_DEVICE(device)) RngDeviceStore rng_store(params->host_ref(), StreamId{0}, 1024); // Generate on device - std::vector values = re_test_native(rng_store.ref()); + std::vector values + = re_test_native(params->device_ref(), rng_store.ref()); // Print a subset of the values std::vector test_values; @@ -282,7 +283,8 @@ TYPED_TEST(DeviceRngEngineFloatTest, DISABLED_device) RngDeviceStore rng_store(this->params->host_ref(), StreamId{0}, 100); // Generate on device - auto values = re_test_canonical(rng_store.ref()); + auto values = re_test_canonical(this->params->device_ref(), + rng_store.ref()); // Test result EXPECT_EQ(rng_store.size(), values.size()); diff --git a/test/celeritas/random/RngEngine.test.cu b/test/celeritas/random/RngEngine.test.cu index 1260ea03e3..8c33b2403b 100644 --- a/test/celeritas/random/RngEngine.test.cu +++ b/test/celeritas/random/RngEngine.test.cu @@ -29,25 +29,27 @@ namespace // KERNELS //---------------------------------------------------------------------------// -__global__ void sample_native_kernel(DeviceRef view, +__global__ void sample_native_kernel(RngDeviceParamsRef params, + RngDeviceStateRef states, RngEngine::result_type* samples) { auto tid = TrackSlotId{KernelParamCalculator::thread_id().unchecked_get()}; - if (tid.get() < view.size()) + if (tid.get() < states.size()) { - RngEngine rng(view, tid); + RngEngine rng(params, states, tid); samples[tid.get()] = rng(); } } template -__global__ void -sample_canonical_kernel(DeviceRef view, RealType* samples) +__global__ void sample_canonical_kernel(RngDeviceParamsRef params, + RngDeviceStateRef states, + RealType* samples) { auto tid = TrackSlotId{KernelParamCalculator::thread_id().unchecked_get()}; - if (tid.get() < view.size()) + if (tid.get() < states.size()) { - RngEngine rng(view, tid); + RngEngine rng(params, states, tid); samples[tid.get()] = generate_canonical(rng); } } @@ -57,13 +59,15 @@ sample_canonical_kernel(DeviceRef view, RealType* samples) // TESTING INTERFACE //---------------------------------------------------------------------------// //! Run on device and return results -std::vector re_test_native(RngDeviceRef states) +std::vector +re_test_native(RngDeviceParamsRef params, RngDeviceStateRef states) { thrust::device_vector samples(states.size()); CELER_LAUNCH_KERNEL(sample_native, states.size(), 0, + params, states, raw_pointer_cast(samples.data())); @@ -76,7 +80,8 @@ std::vector re_test_native(RngDeviceRef states) //---------------------------------------------------------------------------// //! Run on device and return results template -std::vector re_test_canonical(RngDeviceRef states) +std::vector +re_test_canonical(RngDeviceParamsRef params, RngDeviceStateRef states) { thrust::device_vector samples(states.size()); @@ -89,6 +94,7 @@ std::vector re_test_canonical(RngDeviceRef states) grid.threads_per_block, 0, 0, + params, states, raw_pointer_cast(samples.data())); CELER_DEVICE_CHECK_ERROR(); @@ -104,8 +110,10 @@ std::vector re_test_canonical(RngDeviceRef states) // EXPLICIT INSTANTIATION //---------------------------------------------------------------------------// -template std::vector re_test_canonical(RngDeviceRef); -template std::vector re_test_canonical(RngDeviceRef); +template std::vector + re_test_canonical(RngDeviceParamsRef, RngDeviceStateRef); +template std::vector + re_test_canonical(RngDeviceParamsRef, RngDeviceStateRef); //---------------------------------------------------------------------------// } // namespace test diff --git a/test/celeritas/random/RngEngine.test.hh b/test/celeritas/random/RngEngine.test.hh index 9548a13342..b1cb55dedc 100644 --- a/test/celeritas/random/RngEngine.test.hh +++ b/test/celeritas/random/RngEngine.test.hh @@ -21,23 +21,24 @@ namespace test // TESTING INTERFACE //---------------------------------------------------------------------------// //! Input data -using RngDeviceRef = DeviceRef; +using RngDeviceParamsRef = DeviceCRef; +using RngDeviceStateRef = DeviceRef; //---------------------------------------------------------------------------// //! Run on device and return results -std::vector re_test_native(RngDeviceRef); +std::vector re_test_native(RngDeviceParamsRef, RngDeviceStateRef); template -std::vector re_test_canonical(RngDeviceRef); +std::vector re_test_canonical(RngDeviceParamsRef, RngDeviceStateRef); #if !CELER_USE_DEVICE -std::vector re_test_native(RngDeviceRef) +std::vector re_test_native(RngDeviceParamsRef, RngDeviceStateRef) { CELER_NOT_CONFIGURED("CUDA or HIP"); } template -inline std::vector re_test_canonical(RngDeviceRef) +inline std::vector re_test_canonical(RngDeviceParamsRef, RngDeviceStateRef) { CELER_NOT_CONFIGURED("CUDA or HIP"); } diff --git a/test/celeritas/random/XorwowRngEngine.test.cc b/test/celeritas/random/XorwowRngEngine.test.cc index 51dfda9b4a..9969b9b882 100644 --- a/test/celeritas/random/XorwowRngEngine.test.cc +++ b/test/celeritas/random/XorwowRngEngine.test.cc @@ -196,7 +196,7 @@ TEST_F(XorwowRngEngineTest, moments) for (unsigned int i = 0; i < num_seeds; ++i) { - XorwowRngEngine rng(states.ref(), TrackSlotId{i}); + XorwowRngEngine rng(params->host_ref(), states.ref(), TrackSlotId{i}); for (unsigned int j = 0; j < num_samples; ++j) { tally(generate_canonical(rng)); From 813f5c0d3e86ef7b54aefc1a24e56d10e5246853 Mon Sep 17 00:00:00 2001 From: Amanda Lund Date: Tue, 5 Dec 2023 03:52:26 +0000 Subject: [PATCH 3/6] Update formatting and doc, fix Weyl sequence, fix params assignment --- src/celeritas/random/CuHipRngEngine.hh | 2 +- src/celeritas/random/XorwowRngData.hh | 5 +- src/celeritas/random/XorwowRngEngine.hh | 88 +++++++++++++------------ src/celeritas/random/XorwowRngParams.cc | 48 +++++++------- 4 files changed, 74 insertions(+), 69 deletions(-) diff --git a/src/celeritas/random/CuHipRngEngine.hh b/src/celeritas/random/CuHipRngEngine.hh index c261f4b201..eea31ddf16 100644 --- a/src/celeritas/random/CuHipRngEngine.hh +++ b/src/celeritas/random/CuHipRngEngine.hh @@ -42,7 +42,7 @@ class CuHipRngEngine TrackSlotId tid); // Initialize state from seed - inline CELER_FUNCTION CuHipRngEngine& operator=(Initializer_t const& s); + inline CELER_FUNCTION CuHipRngEngine& operator=(Initializer_t const&); // Sample a random number inline CELER_FUNCTION result_type operator()(); diff --git a/src/celeritas/random/XorwowRngData.hh b/src/celeritas/random/XorwowRngData.hh index 6e83e487bd..e31572268a 100644 --- a/src/celeritas/random/XorwowRngData.hh +++ b/src/celeritas/random/XorwowRngData.hh @@ -18,9 +18,6 @@ namespace celeritas //---------------------------------------------------------------------------// /*! * Persistent data for XORWOW generator. - * - * If we want to add the "discard" operation or support initialization with a - * subsequence or offset, we can add the precomputed XORWOW jump matrices here. */ template struct XorwowRngParamsData @@ -55,6 +52,8 @@ struct XorwowRngParamsData { CELER_EXPECT(other); seed = other.seed; + jump = other.jump; + jump_subsequence = other.jump_subsequence; return *this; } }; diff --git a/src/celeritas/random/XorwowRngEngine.hh b/src/celeritas/random/XorwowRngEngine.hh index 37aaadac30..005c749df4 100644 --- a/src/celeritas/random/XorwowRngEngine.hh +++ b/src/celeritas/random/XorwowRngEngine.hh @@ -38,7 +38,16 @@ namespace celeritas * representation of the recurrance, see: Haramoto, H., Matsumoto, M., * Nishimura, T., Panneton, F., L’Ecuyer, P. 2008. "Efficient jump ahead for * F2-linear random number generators". INFORMS Journal on Computing. - * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251 + * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. + * + * The jump polynomials were precomputed using + * https://github.com/celeritas-project/utils/blob/main/prng/xorwow-jump.py. + * For a more detailed description of how to calculate the jump polynomials + * using Knuth's square-and-multiply algorithm in O(k^2 log d) time (where k is + * the number of bits in the state and d is the number of steps to skip ahead), + * see: Collins, J. 2008. "Testing, Selection, and Implementation of Random + * Number Generators". ARL-TR-4498. + * https://apps.dtic.mil/sti/pdfs/ADA486637.pdf. */ class XorwowRngEngine { @@ -69,14 +78,11 @@ class XorwowRngEngine // Generate a 32-bit pseudorandom number inline CELER_FUNCTION result_type operator()(); - // Apply the transformation to the state - inline CELER_FUNCTION void next(); - // Jump ahead \c n steps - inline CELER_FUNCTION void discard(ull_int n); + inline CELER_FUNCTION void skipahead(ull_int n); // Jump ahead \c n subsequences (\c n * 2^67 steps) - inline CELER_FUNCTION void discard_subsequence(ull_int n); + inline CELER_FUNCTION void skipahead_subsequence(ull_int n); private: /// TYPES /// @@ -91,6 +97,7 @@ class XorwowRngEngine //// HELPER FUNCTIONS //// + inline CELER_FUNCTION void next(); inline CELER_FUNCTION void jump(ull_int, ArrayJumpPoly const&); inline CELER_FUNCTION void jump(JumpPoly const&); @@ -171,8 +178,8 @@ XorwowRngEngine::operator=(Initializer_t const& init) state_->weylstate = static_cast(seed >> 32); // Skip ahead - this->discard_subsequence(init.subsequence); - this->discard(init.offset); + this->skipahead_subsequence(init.subsequence); + this->skipahead(init.offset); return *this; } @@ -184,32 +191,15 @@ XorwowRngEngine::operator=(Initializer_t const& init) CELER_FUNCTION auto XorwowRngEngine::operator()() -> result_type { this->next(); - return state_->weylstate + state_->xorstate[4]; -} - -//---------------------------------------------------------------------------// -/*! - * Apply the transformation to the state. - */ -CELER_FUNCTION void XorwowRngEngine::next() -{ - auto& s = state_->xorstate; - auto const t = (s[0] ^ (s[0] >> 2u)); - - s[0] = s[1]; - s[1] = s[2]; - s[2] = s[3]; - s[3] = s[4]; - s[4] = (s[4] ^ (s[4] << 4u)) ^ (t ^ (t << 1u)); - state_->weylstate += 362437u; + return state_->weylstate + state_->xorstate[4]; } //---------------------------------------------------------------------------// /*! * Advance the state \c n steps. */ -CELER_FUNCTION void XorwowRngEngine::discard(ull_int n) +CELER_FUNCTION void XorwowRngEngine::skipahead(ull_int n) { this->jump(n, params_.jump); state_->weylstate += static_cast(n) * 362437u; @@ -222,11 +212,29 @@ CELER_FUNCTION void XorwowRngEngine::discard(ull_int n) * Note that the Weyl sequence value remains the same since it has period 2^32 * which divides evenly into 2^67. */ -CELER_FUNCTION void XorwowRngEngine::discard_subsequence(ull_int n) +CELER_FUNCTION void XorwowRngEngine::skipahead_subsequence(ull_int n) { this->jump(n, params_.jump_subsequence); } +//---------------------------------------------------------------------------// +/*! + * Apply the transformation to the state. + * + * This does not update the Weyl sequence value. + */ +CELER_FUNCTION void XorwowRngEngine::next() +{ + auto& s = state_->xorstate; + auto const t = (s[0] ^ (s[0] >> 2u)); + + s[0] = s[1]; + s[1] = s[2]; + s[2] = s[3]; + s[3] = s[4]; + s[4] = (s[4] ^ (s[4] << 4u)) ^ (t ^ (t << 1u)); +} + //---------------------------------------------------------------------------// /*! * Jump ahead \c n steps or subsequences. @@ -242,16 +250,17 @@ XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) constexpr size_type max_num_jump = 3; // Start with the smallest jump (either one step or one subsequence) - size_type jp_idx = 0; + size_type jump_idx = 0; while (n > 0) { // Number of times to apply this jump polynomial uint_t num_jump = static_cast(n) & max_num_jump; for (size_type i = 0; i < num_jump; ++i) { - this->jump(jump_poly_arr[jp_idx]); + CELER_ASSERT(jump_idx < jump_poly_arr.size()); + this->jump(jump_poly_arr[jump_idx]); } - ++jp_idx; + ++jump_idx; n >>= 2; } } @@ -261,7 +270,7 @@ XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) * Jump ahead using the given jump polynomial. * * This uses the polynomial representation to apply the recurrance to the - * state. The theory is described in + * state. The theory is described in * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. Let * \f[ g(z) = z^d \mod p(z) = a_1 z^{k-1} + ... + a_{k-1} z + a_k, @@ -269,7 +278,7 @@ XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) * where \f$ p(z) = det(zI + T) \f$ is the characteristic polynomial and \f$ T * \f$ is the transformation matrix. Observing that \f$ g(z) = z^d q(z)p(z) \f$ * for some polynomial \f$ q(z) \f$ and that \f$ p(T) = 0 \f$ (a fundamental - * property of the characteristic polynomial, it follows that + * property of the characteristic polynomial), it follows that * \f[ g(T) = T^d = a_1 A^{k-1} + ... + a_{k-1} A + a_k I. * \f] @@ -281,22 +290,19 @@ XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) * \f] * Note that applying \f$ T \f$ to \f$ x \f$ is equivalent to calling \c * next(), and that in \f$ F_2 \f$, the finite field with two elements, - * addition is the same as subtraction and equivalent to bitwise exclusive or. - * Multiplication is bitwise and. + * addition is the same as subtraction and equivalent to bitwise exclusive or, + * and multiplication is bitwise and. */ CELER_FUNCTION void XorwowRngEngine::jump(JumpPoly const& jump_poly) { - constexpr size_type num_bits = 32; - constexpr size_type num_words = 5; - Array s = {0}; - for (size_type i : range(num_words)) + for (size_type i : range(params_.num_words())) { - for (size_type j : range(num_bits)) + for (size_type j : range(params_.num_bits())) { if (jump_poly[i] & (1 << j)) { - for (size_type k : range(num_words)) + for (size_type k : range(params_.num_words())) { s[k] ^= state_->xorstate[k]; } diff --git a/src/celeritas/random/XorwowRngParams.cc b/src/celeritas/random/XorwowRngParams.cc index 91eac3a8f8..750f50cf34 100644 --- a/src/celeritas/random/XorwowRngParams.cc +++ b/src/celeritas/random/XorwowRngParams.cc @@ -33,7 +33,7 @@ XorwowRngParams::XorwowRngParams(unsigned int seed) /*! * Get the jump polynomials. * - * The jump polynomials (as well as the jump matrices) are calculated using + * The jump polynomials (and the jump matrices) can be generated using * https://github.com/celeritas-project/utils/blob/main/prng/xorwow-jump.py. * * The coefficients of the polynomial are packed into a 160-bit integer, which @@ -44,17 +44,17 @@ XorwowRngParams::XorwowRngParams(unsigned int seed) */ auto XorwowRngParams::get_jump_poly() -> ArrayJumpPoly const& { - static ArrayJumpPoly const jump - = {{{0x2, 0x0, 0x0, 0x0, 0x0}, - {0x10, 0x0, 0x0, 0x0, 0x0}, - {0x10000, 0x0, 0x0, 0x0, 0x0}, - {0x0, 0x0, 0x1, 0x0, 0x0}, - {0xbebd3534, 0x7064f5bc, 0x20be29eb, 0x536d5b32, 0x63a0069}, - {0xed64ec08, 0xafc48684, 0xd81c59ee, 0x1640314f, 0x2bf0ccef}, - {0x9d10e028, 0xee56d79c, 0xfb1b3286, 0xf747418, 0x26f9476d}, - {0x3f490634, 0x9ae593fc, 0x1a95bb6b, 0xda10a3fc, 0xa3abaf54}, - {0xfb9680e9, 0xbdaba0b2, 0x3986540f, 0x23fe6ccc, 0x994e82f}, - {0x32da6db4, 0x80135829, 0x3abd4734, 0x2060c3f9, 0x38b2dd97}}}; + static ArrayJumpPoly const jump = { + {{0x00000002u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u}, + {0x00000010u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u}, + {0x00010000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u}, + {0x00000000u, 0x00000000u, 0x00000001u, 0x00000000u, 0x00000000u}, + {0xbebd3534u, 0x7064f5bcu, 0x20be29ebu, 0x536d5b32u, 0x063a0069u}, + {0xed64ec08u, 0xafc48684u, 0xd81c59eeu, 0x1640314fu, 0x2bf0ccefu}, + {0x9d10e028u, 0xee56d79cu, 0xfb1b3286u, 0x0f747418u, 0x26f9476du}, + {0x3f490634u, 0x9ae593fcu, 0x1a95bb6bu, 0xda10a3fcu, 0xa3abaf54u}, + {0xfb9680e9u, 0xbdaba0b2u, 0x3986540fu, 0x23fe6cccu, 0x0994e82fu}, + {0x32da6db4u, 0x80135829u, 0x3abd4734u, 0x2060c3f9u, 0x38b2dd97u}}}; return jump; } @@ -62,21 +62,21 @@ auto XorwowRngParams::get_jump_poly() -> ArrayJumpPoly const& /*! * Get the jump polynomials for jumping over subsequences. * - * The jump sizes are 4^i * 2^67 for i = [0, 10). + * A subsequence is 2^67 steps. The jump sizes are 4^i * 2^67 for i = [0, 10). */ auto XorwowRngParams::get_jump_subsequence_poly() -> ArrayJumpPoly const& { - static ArrayJumpPoly const jump_subsequence - = {{{0x26294934, 0x77bbc248, 0x1a87dad0, 0x930052d4, 0x947e6dd2}, - {0xa7474d19, 0x37c549e0, 0x140877d2, 0x24c43924, 0xcd52ebec}, - {0xfcc8f692, 0x35aa698a, 0x4ebbf85, 0x448304b0, 0x82f3fd5}, - {0xe502f5b3, 0x77859d31, 0x97e1cbb3, 0xea09047a, 0x61d5f37e}, - {0xb3a15db4, 0xc6df3330, 0x1a8be751, 0xf1e4b221, 0x6bb61c05}, - {0x19e2bee8, 0xf8974218, 0x4e65536a, 0xa2570336, 0xe9b88082}, - {0xe2cae2e, 0xd1011279, 0x58923768, 0x2bf650ba, 0xc985bcda}, - {0x146281e7, 0xa45b4452, 0xafa8c695, 0x74a0ac94, 0x4b250e0a}, - {0x89658c8b, 0xf3914315, 0xa73fe84b, 0x3c5fadb6, 0xadba8dd6}, - {0x82161afc, 0x9bb13c55, 0xfd20d7fb, 0x306d90b9, 0x92bf386f}}}; + static ArrayJumpPoly const jump_subsequence = { + {{0x26294934u, 0x77bbc248u, 0x1a87dad0u, 0x930052d4u, 0x947e6dd2u}, + {0xa7474d19u, 0x37c549e0u, 0x140877d2u, 0x24c43924u, 0xcd52ebecu}, + {0xfcc8f692u, 0x35aa698au, 0x04ebbf85u, 0x448304b0u, 0x082f3fd5u}, + {0xe502f5b3u, 0x77859d31u, 0x97e1cbb3u, 0xea09047au, 0x61d5f37eu}, + {0xb3a15db4u, 0xc6df3330u, 0x1a8be751u, 0xf1e4b221u, 0x6bb61c05u}, + {0x19e2bee8u, 0xf8974218u, 0x4e65536au, 0xa2570336u, 0xe9b88082u}, + {0x0e2cae2eu, 0xd1011279u, 0x58923768u, 0x2bf650bau, 0xc985bcdau}, + {0x146281e7u, 0xa45b4452u, 0xafa8c695u, 0x74a0ac94u, 0x4b250e0au}, + {0x89658c8bu, 0xf3914315u, 0xa73fe84bu, 0x3c5fadb6u, 0xadba8dd6u}, + {0x82161afcu, 0x9bb13c55u, 0xfd20d7fbu, 0x306d90b9u, 0x92bf386fu}}}; return jump_subsequence; } From c6e8dfb0c58e4e5f7e39f63a2894d532765a7b49 Mon Sep 17 00:00:00 2001 From: Amanda Lund Date: Tue, 5 Dec 2023 15:25:27 +0000 Subject: [PATCH 4/6] Add tests and more jumps --- src/celeritas/random/CuHipRngData.cc | 2 +- src/celeritas/random/XorwowRngData.hh | 2 +- src/celeritas/random/XorwowRngEngine.hh | 28 +++++----- src/celeritas/random/XorwowRngParams.cc | 52 +++++++++++++++-- src/celeritas/random/XorwowRngParams.hh | 2 +- test/celeritas/random/XorwowRngEngine.test.cc | 56 +++++++++++++++++++ 6 files changed, 121 insertions(+), 21 deletions(-) diff --git a/src/celeritas/random/CuHipRngData.cc b/src/celeritas/random/CuHipRngData.cc index 37a4390ad4..a08c06e61e 100644 --- a/src/celeritas/random/CuHipRngData.cc +++ b/src/celeritas/random/CuHipRngData.cc @@ -43,7 +43,7 @@ void resize(CuHipRngStateData* state, seed = sample_uniform_int(host_rng); } - // The params are needed in the correct memspace to initialize the engine + // Set up params on device to initialize the engine HostVal host_data; host_data.seed = params.seed; CuHipRngParamsData data; diff --git a/src/celeritas/random/XorwowRngData.hh b/src/celeritas/random/XorwowRngData.hh index e31572268a..337cccb5ad 100644 --- a/src/celeritas/random/XorwowRngData.hh +++ b/src/celeritas/random/XorwowRngData.hh @@ -26,7 +26,7 @@ struct XorwowRngParamsData using uint_t = unsigned int; using JumpPoly = Array; - using ArrayJumpPoly = Array; + using ArrayJumpPoly = Array; //// DATA //// diff --git a/src/celeritas/random/XorwowRngEngine.hh b/src/celeritas/random/XorwowRngEngine.hh index 005c749df4..3e6266eeca 100644 --- a/src/celeritas/random/XorwowRngEngine.hh +++ b/src/celeritas/random/XorwowRngEngine.hh @@ -35,7 +35,7 @@ namespace celeritas * sequence (https://www.jstatsoft.org/index.php/jss/article/view/v008i14/916). * * For a description of the jump ahead method using the polynomial - * representation of the recurrance, see: Haramoto, H., Matsumoto, M., + * representation of the recurrence, see: Haramoto, H., Matsumoto, M., * Nishimura, T., Panneton, F., L’Ecuyer, P. 2008. "Efficient jump ahead for * F2-linear random number generators". INFORMS Journal on Computing. * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. @@ -78,17 +78,17 @@ class XorwowRngEngine // Generate a 32-bit pseudorandom number inline CELER_FUNCTION result_type operator()(); - // Jump ahead \c n steps - inline CELER_FUNCTION void skipahead(ull_int n); + // Jump ahead \c num_skip steps + inline CELER_FUNCTION void skipahead(ull_int num_skip); - // Jump ahead \c n subsequences (\c n * 2^67 steps) - inline CELER_FUNCTION void skipahead_subsequence(ull_int n); + // Jump ahead \c num_skip subsequences (\c num_skip * 2^67 steps) + inline CELER_FUNCTION void skipahead_subsequence(ull_int num_skip); private: /// TYPES /// using JumpPoly = Array; - using ArrayJumpPoly = Array; + using ArrayJumpPoly = Array; /// DATA /// @@ -199,22 +199,22 @@ CELER_FUNCTION auto XorwowRngEngine::operator()() -> result_type /*! * Advance the state \c n steps. */ -CELER_FUNCTION void XorwowRngEngine::skipahead(ull_int n) +CELER_FUNCTION void XorwowRngEngine::skipahead(ull_int num_skip) { - this->jump(n, params_.jump); - state_->weylstate += static_cast(n) * 362437u; + this->jump(num_skip, params_.jump); + state_->weylstate += static_cast(num_skip) * 362437u; } //---------------------------------------------------------------------------// /*! - * Advance the state \c n subsequences (\c n * 2^67 steps). + * Advance the state \c num_skip subsequences (\c num_skip * 2^67 steps). * * Note that the Weyl sequence value remains the same since it has period 2^32 * which divides evenly into 2^67. */ -CELER_FUNCTION void XorwowRngEngine::skipahead_subsequence(ull_int n) +CELER_FUNCTION void XorwowRngEngine::skipahead_subsequence(ull_int num_skip) { - this->jump(n, params_.jump_subsequence); + this->jump(num_skip, params_.jump_subsequence); } //---------------------------------------------------------------------------// @@ -246,7 +246,7 @@ CELER_FUNCTION void XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) { // Maximum number of times to apply any jump polynomial. Since the jump - // sizes are 4^i for i = [0, 10), the max is 3. + // sizes are 4^i for i = [0, 32), the max is 3. constexpr size_type max_num_jump = 3; // Start with the smallest jump (either one step or one subsequence) @@ -269,7 +269,7 @@ XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) /*! * Jump ahead using the given jump polynomial. * - * This uses the polynomial representation to apply the recurrance to the + * This uses the polynomial representation to apply the recurrence to the * state. The theory is described in * https://pubsonline.informs.org/doi/10.1287/ijoc.1070.0251. Let * \f[ diff --git a/src/celeritas/random/XorwowRngParams.cc b/src/celeritas/random/XorwowRngParams.cc index 750f50cf34..7285e918ce 100644 --- a/src/celeritas/random/XorwowRngParams.cc +++ b/src/celeritas/random/XorwowRngParams.cc @@ -40,7 +40,7 @@ XorwowRngParams::XorwowRngParams(unsigned int seed) * is then split into 5 32-bit integers, ordered from the lower 32 bits to the * upper 32 bits. * - * The jump sizes are 4^i for i = [0, 10). + * The jump sizes are 4^i for i = [0, 32). */ auto XorwowRngParams::get_jump_poly() -> ArrayJumpPoly const& { @@ -54,7 +54,29 @@ auto XorwowRngParams::get_jump_poly() -> ArrayJumpPoly const& {0x9d10e028u, 0xee56d79cu, 0xfb1b3286u, 0x0f747418u, 0x26f9476du}, {0x3f490634u, 0x9ae593fcu, 0x1a95bb6bu, 0xda10a3fcu, 0xa3abaf54u}, {0xfb9680e9u, 0xbdaba0b2u, 0x3986540fu, 0x23fe6cccu, 0x0994e82fu}, - {0x32da6db4u, 0x80135829u, 0x3abd4734u, 0x2060c3f9u, 0x38b2dd97u}}}; + {0x32da6db4u, 0x80135829u, 0x3abd4734u, 0x2060c3f9u, 0x38b2dd97u}, + {0x0c5b636fu, 0x4407a814u, 0x60204515u, 0x5bd4dbd2u, 0x2509eeb5u}, + {0x21e9179du, 0x2b57aa94u, 0x5f06e1fcu, 0x6735dc98u, 0x9aa0181fu}, + {0x793adf2bu, 0x3d3e75c8u, 0x0b091758u, 0x9deb3f50u, 0xbcd116ecu}, + {0xfa8fc346u, 0x694921b1u, 0xf2bd4c48u, 0x8b05ae1bu, 0x2de7aee2u}, + {0xf144e1f3u, 0xb9b4b3c4u, 0x8222f622u, 0x9072105au, 0x66083550u}, + {0xbd734cf1u, 0x9254a905u, 0xfb38236du, 0x11e62fd3u, 0xf9e7d21eu}, + {0x943999e4u, 0xc05db913u, 0x4e4010f3u, 0x9b865d3du, 0x0fd64174u}, + {0x19eb1bbbu, 0xbaea3750u, 0x0fa8f206u, 0xd49dd019u, 0x9b3bccafu}, + {0xc97b5642u, 0x2ebac13du, 0xbfe04058u, 0x2c6a7132u, 0x576780a5u}, + {0x0ac5eea9u, 0xa37bfcd3u, 0x790ec91du, 0xba339dbcu, 0xc83db5cdu}, + {0xa33b53ffu, 0x1ce9360du, 0x4727f89bu, 0x05eacbcdu, 0x01632278u}, + {0x22b4f98cu, 0xd23a7f5au, 0x8d420eddu, 0xeadda806u, 0xcfd2a002u}, + {0xf66ad52bu, 0x3ab8e3d7u, 0x1e8352a4u, 0xe44a8605u, 0x6c106869u}, + {0x79a31c08u, 0xc28d5d18u, 0x91649708u, 0x8b1bb8f8u, 0xf9158a86u}, + {0x670f870eu, 0xa7bb9766u, 0xef013c78u, 0xeb4a1373u, 0x256f3323u}, + {0xf333af18u, 0x3b4e266bu, 0xa872663eu, 0x3888cd82u, 0x4daf13ecu}, + {0x75689dc9u, 0x036bf3a9u, 0x64ce979cu, 0x3bdff14fu, 0x51e43048u}, + {0xef06fe75u, 0xefd6d1d8u, 0x09319075u, 0x69d568f5u, 0x5b2bd898u}, + {0xf05ae255u, 0xc4df4ca0u, 0xf032420cu, 0xf44ae9f0u, 0xe0298de2u}, + {0x02308bc9u, 0xdbe74deeu, 0xf4c5fe7du, 0xaac571a1u, 0xaa1f5f8bu}, + {0x6e8043cau, 0x0ed24ff6u, 0x1e668b6au, 0x538fc45fu, 0x4bfb509du}, + {0x9f564f7bu, 0x543973e4u, 0x9b33ee2cu, 0x149df73au, 0x58f31585u}}}; return jump; } @@ -62,7 +84,7 @@ auto XorwowRngParams::get_jump_poly() -> ArrayJumpPoly const& /*! * Get the jump polynomials for jumping over subsequences. * - * A subsequence is 2^67 steps. The jump sizes are 4^i * 2^67 for i = [0, 10). + * A subsequence is 2^67 steps. The jump sizes are 4^i * 2^67 for i = [0, 32). */ auto XorwowRngParams::get_jump_subsequence_poly() -> ArrayJumpPoly const& { @@ -76,7 +98,29 @@ auto XorwowRngParams::get_jump_subsequence_poly() -> ArrayJumpPoly const& {0x0e2cae2eu, 0xd1011279u, 0x58923768u, 0x2bf650bau, 0xc985bcdau}, {0x146281e7u, 0xa45b4452u, 0xafa8c695u, 0x74a0ac94u, 0x4b250e0au}, {0x89658c8bu, 0xf3914315u, 0xa73fe84bu, 0x3c5fadb6u, 0xadba8dd6u}, - {0x82161afcu, 0x9bb13c55u, 0xfd20d7fbu, 0x306d90b9u, 0x92bf386fu}}}; + {0x82161afcu, 0x9bb13c55u, 0xfd20d7fbu, 0x306d90b9u, 0x92bf386fu}, + {0x3e9058c1u, 0x71da9705u, 0xe2cb1f2bu, 0x73456536u, 0xbd6501b6u}, + {0xb1321eb5u, 0xb06d01a2u, 0x51532012u, 0x8fb59962u, 0x141d3b0bu}, + {0xa0ffe9a4u, 0xf57be00eu, 0xe706880cu, 0x191211efu, 0xee5664fbu}, + {0xe129d45du, 0xac3e698bu, 0xd61f79fdu, 0x7a72a28bu, 0x7f4942beu}, + {0x874e26a7u, 0x33bc5a47u, 0xad95cba5u, 0x67651c39u, 0xf1f07dcau}, + {0x4b324c2eu, 0x2bdcc5a0u, 0x53eb4240u, 0xcac4fbc1u, 0x13e529e7u}, + {0x5fe4b704u, 0xd77445c3u, 0xb80eeb3cu, 0x6720fc6du, 0x7da33f71u}, + {0x27786b0du, 0x55a8b8bbu, 0xad73d087u, 0x548172a6u, 0xb8dcb607u}, + {0x1e9372ddu, 0xe081adccu, 0xf9650df2u, 0x0ad599e4u, 0x21aeba83u}, + {0x552ec26fu, 0x2663dad8u, 0x25bf8d5eu, 0x538f9e9bu, 0x804bfd4cu}, + {0xab750c90u, 0x454415efu, 0xd94a347cu, 0xd23c81beu, 0x551c7096u}, + {0xbc6d2665u, 0xc1fd8153u, 0xd43c38c9u, 0xd70344ccu, 0x279357c0u}, + {0x88aced61u, 0x3925e5e8u, 0xc8af3caau, 0xefa299b1u, 0xbc1538f8u}, + {0xc3051a0au, 0x11a68894u, 0x0ec32c75u, 0xb9e1af76u, 0x45d20f13u}, + {0x54f062f0u, 0xcf7989d8u, 0x443e496bu, 0x17d83e81u, 0xa2be8639u}, + {0x267af43cu, 0x14dfd913u, 0x2dbb25b6u, 0x227b1a06u, 0x24402bc6u}, + {0xad66cdfeu, 0x7cfa6a50u, 0x8fca746au, 0x7b18f04bu, 0x28eeb28fu}, + {0x37abb017u, 0x7735fb03u, 0x557fb7cdu, 0x520ea993u, 0x69e4a18du}, + {0x53140fddu, 0x0dfb37a9u, 0x88772b05u, 0x20be07e3u, 0x128c07bdu}, + {0x1e72e926u, 0x829ca1d2u, 0x084c2bd7u, 0xcea065e7u, 0xd2b401bfu}, + {0x93d21898u, 0x97c7960eu, 0xb2899f9du, 0xd528a53du, 0x04f33fcau}, + {0x06e1a24fu, 0x4a7295afu, 0x5534bfe6u, 0x452ec8f1u, 0x79685920u}}}; return jump_subsequence; } diff --git a/src/celeritas/random/XorwowRngParams.hh b/src/celeritas/random/XorwowRngParams.hh index 9c20e267d9..57d76b2d2c 100644 --- a/src/celeritas/random/XorwowRngParams.hh +++ b/src/celeritas/random/XorwowRngParams.hh @@ -44,7 +44,7 @@ class XorwowRngParams final : public ParamsDataInterface //// TYPES //// using JumpPoly = Array; - using ArrayJumpPoly = Array; + using ArrayJumpPoly = Array; //// HELPER FUNCTIONS //// diff --git a/test/celeritas/random/XorwowRngEngine.test.cc b/test/celeritas/random/XorwowRngEngine.test.cc index 9969b9b882..27a0a68aa1 100644 --- a/test/celeritas/random/XorwowRngEngine.test.cc +++ b/test/celeritas/random/XorwowRngEngine.test.cc @@ -205,6 +205,62 @@ TEST_F(XorwowRngEngineTest, moments) tally.check(num_samples * num_seeds, 1e-3); } +TEST_F(XorwowRngEngineTest, jump) +{ + unsigned int size = 2; + + HostStore states(params->host_ref(), StreamId{0}, size); + XorwowRngEngine rng(params->host_ref(), states.ref(), TrackSlotId{0}); + XorwowRngEngine skip_rng(params->host_ref(), states.ref(), TrackSlotId{1}); + + XorwowRngInitializer init; + init.seed = 12345; + init.subsequence = 0; + init.offset = 0; + rng = init; + + for (ull_int offset = 0; offset <= (1 << 16); offset++) + { + // Initialize and skip ahead \c offset steps, equivalent to calling + // next() \c offset times + init.offset = offset; + skip_rng = init; + ASSERT_EQ(rng(), skip_rng()); + } + { + init.subsequence = 0; + init.offset = 0; + rng = init; + + init.subsequence = 256; + skip_rng = init; + + rng.skipahead_subsequence(255); + rng.skipahead_subsequence(1); + EXPECT_EQ(rng(), skip_rng()); + } + { + init.subsequence = 0; + init.offset = 0; + rng = init; + + init.subsequence = (1 << 19); + init.offset = 1024; + skip_rng = init; + + rng.skipahead_subsequence(1 << 18); + rng.skipahead_subsequence(1 << 18); + rng.skipahead(1023); + rng.skipahead(1); + EXPECT_EQ(rng(), skip_rng()); + } + { + init.subsequence = -1; + init.offset = -1; + skip_rng = init; + } +} + TEST_F(XorwowRngEngineTest, TEST_IF_CELER_DEVICE(device)) { // Create and initialize states From ad4128e8ab72ccd7ec3d6a9b44f6eacc3a665c20 Mon Sep 17 00:00:00 2001 From: Amanda Lund Date: Wed, 6 Dec 2023 00:15:50 +0000 Subject: [PATCH 5/6] Address review feedback --- src/celeritas/random/CuHipRngEngine.hh | 4 +- src/celeritas/random/XorwowRngData.hh | 10 ++++- src/celeritas/random/XorwowRngEngine.hh | 44 +++++++++---------- test/celeritas/random/XorwowRngEngine.test.cc | 40 ++++++++--------- 4 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/celeritas/random/CuHipRngEngine.hh b/src/celeritas/random/CuHipRngEngine.hh index eea31ddf16..b1a85262b3 100644 --- a/src/celeritas/random/CuHipRngEngine.hh +++ b/src/celeritas/random/CuHipRngEngine.hh @@ -48,7 +48,6 @@ class CuHipRngEngine inline CELER_FUNCTION result_type operator()(); private: - ParamsRef const& params_; CuHipRngThreadState* state_; template @@ -100,10 +99,9 @@ class GenerateCanonical * Construct from state. */ CELER_FUNCTION -CuHipRngEngine::CuHipRngEngine(ParamsRef const& params, +CuHipRngEngine::CuHipRngEngine(ParamsRef const&, StateRef const& state, TrackSlotId tid) - : params_(params) { CELER_EXPECT(tid < state.rng.size()); state_ = &state.rng[tid]; diff --git a/src/celeritas/random/XorwowRngData.hh b/src/celeritas/random/XorwowRngData.hh index 337cccb5ad..bedff0e2c5 100644 --- a/src/celeritas/random/XorwowRngData.hh +++ b/src/celeritas/random/XorwowRngData.hh @@ -40,8 +40,14 @@ struct XorwowRngParamsData //// METHODS //// - static CELER_CONSTEXPR_FUNCTION size_type num_words() { return 5; } - static CELER_CONSTEXPR_FUNCTION size_type num_bits() { return 32; } + static CELER_CONSTEXPR_FUNCTION size_type num_words() + { + return JumpPoly{}.size(); + } + static CELER_CONSTEXPR_FUNCTION size_type num_bits() + { + return 8 * sizeof(uint_t); + } //! Whether the data is assigned explicit CELER_FUNCTION operator bool() const { return true; } diff --git a/src/celeritas/random/XorwowRngEngine.hh b/src/celeritas/random/XorwowRngEngine.hh index 3e6266eeca..e3bdbe16c7 100644 --- a/src/celeritas/random/XorwowRngEngine.hh +++ b/src/celeritas/random/XorwowRngEngine.hh @@ -78,11 +78,8 @@ class XorwowRngEngine // Generate a 32-bit pseudorandom number inline CELER_FUNCTION result_type operator()(); - // Jump ahead \c num_skip steps - inline CELER_FUNCTION void skipahead(ull_int num_skip); - - // Jump ahead \c num_skip subsequences (\c num_skip * 2^67 steps) - inline CELER_FUNCTION void skipahead_subsequence(ull_int num_skip); + // Advance the state \c count times + inline CELER_FUNCTION void discard(ull_int count); private: /// TYPES /// @@ -97,6 +94,7 @@ class XorwowRngEngine //// HELPER FUNCTIONS //// + inline CELER_FUNCTION void discard_subsequence(ull_int); inline CELER_FUNCTION void next(); inline CELER_FUNCTION void jump(ull_int, ArrayJumpPoly const&); inline CELER_FUNCTION void jump(JumpPoly const&); @@ -156,7 +154,7 @@ XorwowRngEngine::XorwowRngEngine(ParamsRef const& params, * * It is recommended to initialize the state using a very different generator * from the one being initialized to avoid correlations. Here the 64-bit - * SplitMis64 generator is used for initialization (See Matsumoto, Wada, + * SplitMix64 generator is used for initialization (See Matsumoto, Wada, * Kuramoto, and Ashihara, "Common defects in initialization of pseudorandom * number generators". https://dl.acm.org/doi/10.1145/1276927.1276928.) */ @@ -178,8 +176,8 @@ XorwowRngEngine::operator=(Initializer_t const& init) state_->weylstate = static_cast(seed >> 32); // Skip ahead - this->skipahead_subsequence(init.subsequence); - this->skipahead(init.offset); + this->discard_subsequence(init.subsequence); + this->discard(init.offset); return *this; } @@ -197,24 +195,24 @@ CELER_FUNCTION auto XorwowRngEngine::operator()() -> result_type //---------------------------------------------------------------------------// /*! - * Advance the state \c n steps. + * Advance the state \c count times. */ -CELER_FUNCTION void XorwowRngEngine::skipahead(ull_int num_skip) +CELER_FUNCTION void XorwowRngEngine::discard(ull_int count) { - this->jump(num_skip, params_.jump); - state_->weylstate += static_cast(num_skip) * 362437u; + this->jump(count, params_.jump); + state_->weylstate += static_cast(count) * 362437u; } //---------------------------------------------------------------------------// /*! - * Advance the state \c num_skip subsequences (\c num_skip * 2^67 steps). + * Advance the state \c count subsequences (\c count * 2^67 times). * * Note that the Weyl sequence value remains the same since it has period 2^32 * which divides evenly into 2^67. */ -CELER_FUNCTION void XorwowRngEngine::skipahead_subsequence(ull_int num_skip) +CELER_FUNCTION void XorwowRngEngine::discard_subsequence(ull_int count) { - this->jump(num_skip, params_.jump_subsequence); + this->jump(count, params_.jump_subsequence); } //---------------------------------------------------------------------------// @@ -237,13 +235,13 @@ CELER_FUNCTION void XorwowRngEngine::next() //---------------------------------------------------------------------------// /*! - * Jump ahead \c n steps or subsequences. + * Jump ahead \c count steps or subsequences. * * This applies the jump polynomials until the given number of steps or * subsequences have been skipped. */ CELER_FUNCTION void -XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) +XorwowRngEngine::jump(ull_int count, ArrayJumpPoly const& jump_poly_arr) { // Maximum number of times to apply any jump polynomial. Since the jump // sizes are 4^i for i = [0, 32), the max is 3. @@ -251,17 +249,17 @@ XorwowRngEngine::jump(ull_int n, ArrayJumpPoly const& jump_poly_arr) // Start with the smallest jump (either one step or one subsequence) size_type jump_idx = 0; - while (n > 0) + while (count > 0) { // Number of times to apply this jump polynomial - uint_t num_jump = static_cast(n) & max_num_jump; + uint_t num_jump = static_cast(count) & max_num_jump; for (size_type i = 0; i < num_jump; ++i) { CELER_ASSERT(jump_idx < jump_poly_arr.size()); this->jump(jump_poly_arr[jump_idx]); } ++jump_idx; - n >>= 2; + count >>= 2; } } @@ -321,9 +319,9 @@ CELER_FUNCTION void XorwowRngEngine::jump(JumpPoly const& jump_poly) */ CELER_FUNCTION uint64_t XorwowRngEngine::SplitMix64::operator()() { - uint64_t z = (state += 0x9e3779b97f4a7c15); - z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; - z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + uint64_t z = (state += 0x9e3779b97f4a7c15ull); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9ull; + z = (z ^ (z >> 27)) * 0x94d049bb133111ebull; return z ^ (z >> 31); } diff --git a/test/celeritas/random/XorwowRngEngine.test.cc b/test/celeritas/random/XorwowRngEngine.test.cc index 27a0a68aa1..2f22e6cd4f 100644 --- a/test/celeritas/random/XorwowRngEngine.test.cc +++ b/test/celeritas/random/XorwowRngEngine.test.cc @@ -227,38 +227,36 @@ TEST_F(XorwowRngEngineTest, jump) skip_rng = init; ASSERT_EQ(rng(), skip_rng()); } + for (ull_int count : {4, 21, 170, 65535}) { - init.subsequence = 0; - init.offset = 0; - rng = init; - - init.subsequence = 256; - skip_rng = init; - - rng.skipahead_subsequence(255); - rng.skipahead_subsequence(1); + // Skip ahead without initializing + skip_rng.discard(count); + for (ull_int i = 0; i < count; ++i) + { + rng(); + } EXPECT_EQ(rng(), skip_rng()); } { - init.subsequence = 0; + init.subsequence = (1 << 19); init.offset = 0; rng = init; - init.subsequence = (1 << 19); - init.offset = 1024; + init.subsequence += 1; + init.offset = 1023; skip_rng = init; - rng.skipahead_subsequence(1 << 18); - rng.skipahead_subsequence(1 << 18); - rng.skipahead(1023); - rng.skipahead(1); + // Skip 2**67 times to get to the next subsequence + for (size_type i = 0; i < 8; ++i) + { + rng.discard(-1ull); + rng.discard(1); + } + // Skip to the right offset + rng.discard(init.offset); + EXPECT_EQ(rng(), skip_rng()); } - { - init.subsequence = -1; - init.offset = -1; - skip_rng = init; - } } TEST_F(XorwowRngEngineTest, TEST_IF_CELER_DEVICE(device)) From 722d6ce36fbad2636a535d668494bb7a3b985737 Mon Sep 17 00:00:00 2001 From: Amanda Lund Date: Wed, 6 Dec 2023 17:26:37 +0000 Subject: [PATCH 6/6] Be explicit with max ull --- test/celeritas/random/XorwowRngEngine.test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/celeritas/random/XorwowRngEngine.test.cc b/test/celeritas/random/XorwowRngEngine.test.cc index 2f22e6cd4f..788c616904 100644 --- a/test/celeritas/random/XorwowRngEngine.test.cc +++ b/test/celeritas/random/XorwowRngEngine.test.cc @@ -249,7 +249,7 @@ TEST_F(XorwowRngEngineTest, jump) // Skip 2**67 times to get to the next subsequence for (size_type i = 0; i < 8; ++i) { - rng.discard(-1ull); + rng.discard(numeric_limits::max()); rng.discard(1); } // Skip to the right offset