diff --git a/src/gc-stock.h b/src/gc-stock.h index 7176ad8b504f4..45c93bf4289ae 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -446,7 +446,7 @@ STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); - int v = gc_first_tid + (int)cong(jl_n_markthreads - 1, &ptls->rngseed); + int v = gc_first_tid + (int)cong(jl_n_markthreads, &ptls->rngseed); // cong is [0, n) assert(v >= gc_first_tid && v <= gc_last_parallel_collector_thread_id()); return v; } diff --git a/src/julia_internal.h b/src/julia_internal.h index 23a9c90edf8aa..dad28791f8c35 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1307,20 +1307,36 @@ JL_DLLEXPORT size_t jl_maxrss(void); // congruential random number generator // for a small amount of thread-local randomness -STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT +//TODO: utilize https://github.com/openssl/openssl/blob/master/crypto/rand/rand_uniform.c#L13-L99 +// for better performance, it does however require making users expect a 32bit random number. + +STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT // Open interval [0, max) { - if (max == 0) + if (max < 2) return 0; uint64_t mask = ~(uint64_t)0; - --max; - mask >>= __builtin_clzll(max|1); - uint64_t x; + int zeros = __builtin_clzll(max); + int bits = CHAR_BIT * sizeof(uint64_t) - zeros; + mask = mask >> zeros; do { - *seed = 69069 * (*seed) + 362437; - x = *seed & mask; - } while (x > max); - return x; + uint64_t value = 69069 * (*seed) + 362437; + *seed = value; + uint64_t x = value & mask; + if (x < max) { + return x; + } + int bits_left = zeros; + while (bits_left >= bits) { + value >>= bits; + x = value & mask; + if (x < max) { + return x; + } + bits_left -= bits; + } + } while (1); } + JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_init_rand(void); diff --git a/src/scheduler.c b/src/scheduler.c index 3cf97ba108873..71943a25f3233 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -87,7 +87,7 @@ extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, // parallel task runtime // --- -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) { jl_ptls_t ptls = jl_current_task->ptls; return cong(max, &ptls->rngseed); diff --git a/src/signal-handling.c b/src/signal-handling.c index 6835f5fa364c5..d7f4697a3c4f0 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -155,7 +155,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed) // The "modern Fisher–Yates shuffle" - O(n) algorithm // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm for (int i = size; i-- > 1; ) { - size_t j = cong(i, seed); + size_t j = cong(i + 1, seed); // cong is an open interval so we add 1 uint64_t tmp = carray[j]; carray[j] = carray[i]; carray[i] = tmp;