From 989da1880fb615489c15b467e5cf9eb88e07a4fb Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Fri, 16 Aug 2024 10:24:55 -0300 Subject: [PATCH 1/3] Fix cong implementation to be properly random and not just cycling. --- src/julia_internal.h | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/julia_internal.h b/src/julia_internal.h index d4d1a3239785c..e028309e6bd46 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1306,20 +1306,36 @@ JL_DLLEXPORT size_t jl_maxrss(void); // congruential random number generator // for a small amount of thread-local randomness +//TODO: utilize https://github.com/openssl/openssl/blob/master/crypto/rand/rand_uniform.c#L13-L99 +// for better performance, it does however require making users expect a 32bit random number. + STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT { - if (max == 0) + if (max < 2) return 0; uint64_t mask = ~(uint64_t)0; - --max; - mask >>= __builtin_clzll(max|1); - uint64_t x; + int zeros = __builtin_clzll(max); + int bits = CHAR_BIT * sizeof(uint64_t) - zeros; + mask = mask >> zeros; do { - *seed = 69069 * (*seed) + 362437; - x = *seed & mask; - } while (x > max); - return x; + uint64_t value = 69069 * (*seed) + 362437; + *seed = value; + uint64_t x = value & mask; + if (x < max) { + return x; + } + int bits_left = zeros; + while (bits_left >= bits) { + value >>= bits; + x = value & mask; + if (x < max) { + return x; + } + bits_left -= bits; + } + } while (1); } + JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_init_rand(void); From 89048cf057fd82ec0db5b9b07b00ce61fbf27f6c Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Fri, 16 Aug 2024 13:19:33 -0300 Subject: [PATCH 2/3] Adapt C users of cong so that they correctly understand that it's a open interval and not a closed one --- src/gc-stock.h | 2 +- src/julia_internal.h | 2 +- src/scheduler.c | 2 +- src/signal-handling.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/gc-stock.h b/src/gc-stock.h index 7176ad8b504f4..45c93bf4289ae 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -446,7 +446,7 @@ STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT { assert(jl_n_markthreads > 0); - int v = gc_first_tid + (int)cong(jl_n_markthreads - 1, &ptls->rngseed); + int v = gc_first_tid + (int)cong(jl_n_markthreads, &ptls->rngseed); // cong is [0, n) assert(v >= gc_first_tid && v <= gc_last_parallel_collector_thread_id()); return v; } diff --git a/src/julia_internal.h b/src/julia_internal.h index e028309e6bd46..0948e8c69c330 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -1309,7 +1309,7 @@ JL_DLLEXPORT size_t jl_maxrss(void); //TODO: utilize https://github.com/openssl/openssl/blob/master/crypto/rand/rand_uniform.c#L13-L99 // for better performance, it does however require making users expect a 32bit random number. -STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT +STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT // Open interval [0, max) { if (max < 2) return 0; diff --git a/src/scheduler.c b/src/scheduler.c index 3cf97ba108873..645d33cb0b632 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -87,7 +87,7 @@ extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, // parallel task runtime // --- -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) { jl_ptls_t ptls = jl_current_task->ptls; return cong(max, &ptls->rngseed); diff --git a/src/signal-handling.c b/src/signal-handling.c index 6835f5fa364c5..80e9a8acca3ef 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -155,7 +155,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed) // The "modern Fisher–Yates shuffle" - O(n) algorithm // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm for (int i = size; i-- > 1; ) { - size_t j = cong(i, seed); + size_t j = cong(i + 1, seed); // cong is an open interval so we add 1 uint64_t tmp = carray[j]; carray[j] = carray[i]; carray[i] = tmp; From 8a3b7bd609841bfcba4593d9411247171f270060 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Fri, 16 Aug 2024 14:05:27 -0300 Subject: [PATCH 3/3] Whitespace changes --- src/scheduler.c | 2 +- src/signal-handling.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scheduler.c b/src/scheduler.c index 645d33cb0b632..71943a25f3233 100644 --- a/src/scheduler.c +++ b/src/scheduler.c @@ -87,7 +87,7 @@ extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache, // parallel task runtime // --- -JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) +JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max) // [0, n) { jl_ptls_t ptls = jl_current_task->ptls; return cong(max, &ptls->rngseed); diff --git a/src/signal-handling.c b/src/signal-handling.c index 80e9a8acca3ef..d7f4697a3c4f0 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -155,7 +155,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed) // The "modern Fisher–Yates shuffle" - O(n) algorithm // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm for (int i = size; i-- > 1; ) { - size_t j = cong(i + 1, seed); // cong is an open interval so we add 1 + size_t j = cong(i + 1, seed); // cong is an open interval so we add 1 uint64_t tmp = carray[j]; carray[j] = carray[i]; carray[i] = tmp;