diff --git a/src/atomics.h b/src/atomics.h index 0fa5d6c193513..6ee70f08c69ba 100644 --- a/src/atomics.h +++ b/src/atomics.h @@ -3,22 +3,45 @@ #ifndef JL_ATOMICS_H #define JL_ATOMICS_H -// Low-level atomic operations - #if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__) # error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture. #endif -#ifdef _COMPILER_MICROSOFT_ -# include -# include + +// Low-level atomic operations +#ifdef __cplusplus +#include +using std::memory_order_relaxed; +using std::memory_order_consume; +using std::memory_order_acquire; +using std::memory_order_release; +using std::memory_order_acq_rel; +using std::memory_order_seq_cst; +using std::atomic_thread_fence; +using std::atomic_signal_fence; +using std::atomic_load; +using std::atomic_load_explicit; +using std::atomic_store; +using std::atomic_store_explicit; +using std::atomic_fetch_add; +using std::atomic_fetch_add_explicit; +using std::atomic_fetch_and; +using std::atomic_fetch_and_explicit; +using std::atomic_fetch_or; +using std::atomic_fetch_or_explicit; +using std::atomic_compare_exchange_strong; +using std::atomic_compare_exchange_strong_explicit; +using std::atomic_exchange; +using std::atomic_exchange_explicit; +extern "C" { +#define _Atomic(T) std::atomic +#else +#include #endif +#include // for sig_atomic_t + #if defined(_CPU_X86_64_) || defined(_CPU_X86_) # include #endif -#ifndef _OS_WINDOWS_ -# include -#endif -#include enum jl_memory_order { jl_memory_order_unspecified = -2, @@ -50,44 +73,128 @@ enum jl_memory_order { * are). We also need to access these atomic variables from the LLVM JIT code * which is very hard unless the layout of the object is fully specified. */ -#define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST) -#define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE) -#define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST) +#define jl_fence() atomic_thread_fence(memory_order_seq_cst) +#define jl_fence_release() atomic_thread_fence(memory_order_release) +#define jl_signal_fence() atomic_signal_fence(memory_order_seq_cst) +#ifdef __cplusplus +} +// implicit conversion wasn't correctly specified 2017, so many compilers get +// this wrong thus we include the correct definitions here (with implicit +// conversion), instead of using the macro version +template +T jl_atomic_load(std::atomic *ptr) +{ + return std::atomic_load(ptr); +} +template +T jl_atomic_load_explicit(std::atomic *ptr, std::memory_order order) +{ + return std::atomic_load_explicit(ptr, order); +} +#define jl_atomic_load_relaxed(ptr) jl_atomic_load_explicit(ptr, memory_order_relaxed) +#define jl_atomic_load_acquire(ptr) jl_atomic_load_explicit(ptr, memory_order_acquire) +template +void jl_atomic_store(std::atomic *ptr, S desired) +{ + std::atomic_store(ptr, desired); +} +template +void jl_atomic_store_explicit(std::atomic *ptr, S desired, std::memory_order order) +{ + std::atomic_store_explicit(ptr, desired, order); +} +#define jl_atomic_store_relaxed(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_relaxed) +#define jl_atomic_store_release(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_release) +template +T jl_atomic_fetch_add(std::atomic *ptr, S val) +{ + return std::atomic_fetch_add(ptr, val); +} +template +T jl_atomic_fetch_add_explicit(std::atomic *ptr, S val, std::memory_order order) +{ + return std::atomic_fetch_add_explicit(ptr, val, order); +} +#define jl_atomic_fetch_add_relaxed(ptr, val) jl_atomic_fetch_add_explicit(ptr, val, memory_order_relaxed) +template +T jl_atomic_fetch_and(std::atomic *ptr, S val) +{ + return std::atomic_fetch_and(ptr, val); +} +template +T jl_atomic_fetch_and_explicit(std::atomic *ptr, S val, std::memory_order order) +{ + return std::atomic_fetch_and_explicit(ptr, val, order); +} +#define jl_atomic_fetch_and_relaxed(ptr, val) jl_atomic_fetch_and_explicit(ptr, val, memory_order_relaxed) +template +T jl_atomic_fetch_or(std::atomic *ptr, S val) +{ + return std::atomic_fetch_or(ptr, val); +} +template +T jl_atomic_fetch_or_explicit(std::atomic *ptr, S val, std::memory_order order) +{ + return std::atomic_fetch_or_explicit(ptr, val, order); +} +#define jl_atomic_fetch_or_relaxed(ptr, val) jl_atomic_fetch_or_explicit(ptr, val, memory_order_relaxed) +template +bool jl_atomic_cmpswap(std::atomic *ptr, T *expected, S val) +{ + return std::atomic_compare_exchange_strong(ptr, expected, val); +} +template +bool jl_atomic_cmpswap_explicit(std::atomic *ptr, T *expected, S val, std::memory_order order) +{ + return std::atomic_compare_exchange_strong_explicit(ptr, expected, val, order, order); +} +#define jl_atomic_cmpswap_relaxed(ptr, val) jl_atomic_cmpswap_explicit(ptr, val, memory_order_relaxed) +template +T jl_atomic_exchange(std::atomic *ptr, S desired) +{ + return std::atomic_exchange(ptr, desired); +} +template +T jl_atomic_exchange_explicit(std::atomic *ptr, S desired, std::memory_order order) +{ + return std::atomic_exchange_explicit(ptr, desired, order); +} +#define jl_atomic_exchange_relaxed(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_relaxed) +extern "C" { +#else # define jl_atomic_fetch_add_relaxed(obj, arg) \ - __atomic_fetch_add(obj, arg, __ATOMIC_RELAXED) + atomic_fetch_add_explicit(obj, arg, memory_order_relaxed) # define jl_atomic_fetch_add(obj, arg) \ - __atomic_fetch_add(obj, arg, __ATOMIC_SEQ_CST) -# define jl_atomic_add_fetch(obj, arg) \ - __atomic_add_fetch(obj, arg, __ATOMIC_SEQ_CST) + atomic_fetch_add(obj, arg) # define jl_atomic_fetch_and_relaxed(obj, arg) \ - __atomic_fetch_and(obj, arg, __ATOMIC_RELAXED) + atomic_fetch_and_explicit(obj, arg, memory_order_relaxed) # define jl_atomic_fetch_and(obj, arg) \ - __atomic_fetch_and(obj, arg, __ATOMIC_SEQ_CST) + atomic_fetch_and(obj, arg) # define jl_atomic_fetch_or_relaxed(obj, arg) \ - __atomic_fetch_or(obj, arg, __ATOMIC_RELAXED) + atomic_fetch_or_explicit(obj, arg, __ATOMIC_RELAXED) # define jl_atomic_fetch_or(obj, arg) \ - __atomic_fetch_or(obj, arg, __ATOMIC_SEQ_CST) -# define jl_atomic_cmpswap(obj, expected, desired) \ - __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) -# define jl_atomic_cmpswap_relaxed(obj, expected, desired) \ - __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + atomic_fetch_or(obj, arg) +# define jl_atomic_cmpswap(obj, expected, desired) \ + atomic_compare_exchange_strong(obj, expected, desired) +# define jl_atomic_cmpswap_relaxed(obj, expected, desired) \ + atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed) // TODO: Maybe add jl_atomic_cmpswap_weak for spin lock -# define jl_atomic_exchange(obj, desired) \ - __atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST) +# define jl_atomic_exchange(obj, desired) \ + atomic_exchange(obj, desired) # define jl_atomic_exchange_relaxed(obj, desired) \ - __atomic_exchange_n(obj, desired, __ATOMIC_RELAXED) + atomic_exchange_explicit(obj, desired, memory_order_relaxed) # define jl_atomic_store(obj, val) \ - __atomic_store_n(obj, val, __ATOMIC_SEQ_CST) + atomic_store(obj, val) # define jl_atomic_store_relaxed(obj, val) \ - __atomic_store_n(obj, val, __ATOMIC_RELAXED) + atomic_store_explicit(obj, val, memory_order_relaxed) # if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \ !(defined(_CPU_X86_) || defined(_CPU_X86_64_)) // ICC and Clang doesn't have this bug... # define jl_atomic_store_release(obj, val) \ - __atomic_store_n(obj, val, __ATOMIC_RELEASE) + atomic_store_explicit(obj, val, memory_order_release) # else // Workaround a GCC bug when using store with release order by using the // stronger version instead. @@ -95,28 +202,32 @@ enum jl_memory_order { // fixed in https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=d8c40eff56f69877b33c697ded756d50fde90c27 # define jl_atomic_store_release(obj, val) do { \ jl_signal_fence(); \ - __atomic_store_n(obj, val, __ATOMIC_RELEASE); \ + atomic_store_explicit(obj, val, memory_order_release); \ } while (0) # endif # define jl_atomic_load(obj) \ - __atomic_load_n(obj, __ATOMIC_SEQ_CST) + atomic_load(obj) # define jl_atomic_load_acquire(obj) \ - __atomic_load_n(obj, __ATOMIC_ACQUIRE) + atomic_load_explicit(obj, memory_order_acquire) #ifdef _COMPILER_TSAN_ENABLED_ // For the sake of tsan, call these loads consume ordering since they will act // as such on the processors we support while normally, the compiler would // upgrade this to acquire ordering, which is strong (and slower) than we want. # define jl_atomic_load_relaxed(obj) \ - __atomic_load_n(obj, __ATOMIC_CONSUME) + atomic_load_explicit(obj, memory_order_consume) #else # define jl_atomic_load_relaxed(obj) \ - __atomic_load_n(obj, __ATOMIC_RELAXED) + atomic_load_explicit(obj, memory_order_relaxed) +#endif #endif #ifdef __clang_analyzer__ // for the purposes of the analyzer, we can turn these into non-atomic expressions with similar properties // (for the sake of the analyzer, we don't care if it is an exact match for behavior) +#undef _Atomic +#define _Atomic(T) T + #undef jl_atomic_exchange #undef jl_atomic_exchange_relaxed #define jl_atomic_exchange(obj, desired) \ @@ -135,11 +246,12 @@ enum jl_memory_order { __typeof__((obj)) p__analyzer__ = (obj); \ __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \ __typeof__((expected)) x__analyzer__ = (expected); \ - if (temp__analyzer__ == *x__analyzer__) \ + int eq__analyzer__ = memcmp(&temp__analyzer__, x__analyzer__, sizeof(temp__analyzer__)) == 0; \ + if (eq__analyzer__) \ *p__analyzer__ = (desired); \ else \ *x__analyzer__ = temp__analyzer__; \ - temp__analyzer__ == *x__analyzer__; \ + eq__analyzer__; \ })) #define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap @@ -157,7 +269,42 @@ enum jl_memory_order { #define jl_atomic_load_acquire jl_atomic_load #define jl_atomic_load_relaxed jl_atomic_load +#undef jl_atomic_fetch_add +#undef jl_atomic_fetch_and +#undef jl_atomic_fetch_or +#undef jl_atomic_fetch_add_relaxed +#undef jl_atomic_fetch_and_relaxed +#undef jl_atomic_fetch_or_relaxed +#define jl_atomic_fetch_add(obj, val) \ + (__extension__({ \ + __typeof__((obj)) p__analyzer__ = (obj); \ + __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \ + *(p__analyzer__) = temp__analyzer__ + (val); \ + temp__analyzer__; \ + })) +#define jl_atomic_fetch_and(obj, val) \ + (__extension__({ \ + __typeof__((obj)) p__analyzer__ = (obj); \ + __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \ + *(p__analyzer__) = temp__analyzer__ & (val); \ + temp__analyzer__; \ + })) +#define jl_atomic_fetch_or(obj, val) \ + (__extension__({ \ + __typeof__((obj)) p__analyzer__ = (obj); \ + __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \ + *(p__analyzer__) = temp__analyzer__ | (val); \ + temp__analyzer__; \ + })) +#define jl_atomic_fetch_add_relaxed jl_atomic_fetch_add +#define jl_atomic_fetch_and_relaxed jl_atomic_fetch_and +#define jl_atomic_fetch_or_relaxed jl_atomic_fetch_or + #endif +#ifdef __cplusplus +} +#endif + #endif // JL_ATOMICS_H diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp index 23d8b7437b823..344f044737f9d 100644 --- a/src/cgmemmgr.cpp +++ b/src/cgmemmgr.cpp @@ -205,7 +205,7 @@ static intptr_t get_anon_hdl(void) return -1; } -static size_t map_offset = 0; +static _Atomic(size_t) map_offset{0}; // Multiple of 128MB. // Hopefully no one will set a ulimit for this to be a problem... static constexpr size_t map_size_inc_default = 128 * 1024 * 1024; @@ -239,7 +239,7 @@ static intptr_t init_shared_map() anon_hdl = get_anon_hdl(); if (anon_hdl == -1) return -1; - map_offset = 0; + jl_atomic_store_relaxed(&map_offset, 0); map_size = get_map_size_inc(); int ret = ftruncate(anon_hdl, map_size); if (ret != 0) { diff --git a/src/codegen.cpp b/src/codegen.cpp index 5cc9f66ffaeff..815b6786d2785 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3674,12 +3674,14 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const else { jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here jl_code_instance_t *codeinst = (jl_code_instance_t*)ci; - if (ci != jl_nothing && codeinst->invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr - if (codeinst->invoke == jl_fptr_const_return) { + if (ci != jl_nothing) { + auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + // check if we know how to handle this specptr + if (invoke == jl_fptr_const_return) { result = mark_julia_const(codeinst->rettype_const); handled = true; } - else { + else if (invoke != jl_fptr_sparam) { bool specsig, needsparams; std::tie(specsig, needsparams) = uses_specsig(mi, codeinst->rettype, ctx.params->prefer_specsig); std::string name; @@ -3688,9 +3690,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const if (ctx.use_cache) { // optimization: emit the correct name immediately, if we know it // TODO: use `emitted` map here too to try to consolidate names? - if (codeinst->specptr.fptr) { - if (specsig ? codeinst->isspecsig : codeinst->invoke == jl_fptr_args) { - protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst); + auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr); + if (fptr) { + if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args) { + protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst); need_to_emit = false; } } @@ -5059,8 +5063,9 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod ctx.builder.SetInsertPoint(b0); Function *theFunc; Value *theFarg; - if (params.cache && codeinst->invoke != NULL) { - StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->invoke, codeinst); + auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + if (params.cache && invoke != NULL) { + StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst); theFunc = cast( M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(jl_LLVMContext)).getCallee()); theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst); @@ -7820,12 +7825,14 @@ void jl_compile_workqueue( "invalid world for code-instance"); StringRef preal_decl = ""; bool preal_specsig = false; - if (params.cache && codeinst->invoke != NULL) { - if (codeinst->invoke == jl_fptr_args) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst); + auto invoke = jl_atomic_load_relaxed(&codeinst->invoke); + if (params.cache && invoke != NULL) { + auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr); + if (invoke == jl_fptr_args) { + preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst); } else if (codeinst->isspecsig) { - preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst); + preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst); preal_specsig = true; } } diff --git a/src/datatype.c b/src/datatype.c index 8052719c6f55f..95a4eb4c174ce 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -726,23 +726,23 @@ JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt) #if MAX_ATOMIC_SIZE > MAX_POINTERATOMIC_SIZE #error MAX_ATOMIC_SIZE too large #endif +#if MAX_ATOMIC_SIZE >= 16 && !defined(_P64) +#error 12 byte GC pool size alignment unimplemented for 32-bit +#endif #if MAX_POINTERATOMIC_SIZE > 16 #error MAX_POINTERATOMIC_SIZE too large #endif -#if MAX_POINTERATOMIC_SIZE >= 16 -#ifndef _P64 -#error 12 byte GC pool size not implemented for 32-bit -#endif -typedef __uint128_t uint128_t; -typedef uint128_t jl_uatomicmax_t; -#else -typedef uint64_t jl_uatomicmax_t; -#endif - #if BYTE_ORDER != LITTLE_ENDIAN #error using masks for atomics (instead of memcpy like nb == 16) assumes little endian #endif +#if MAX_POINTERATOMIC_SIZE >= 16 +typedef struct _jl_uint128_t { + uint64_t a; + uint64_t b; +} jl_uint128_t; +#endif + static inline uint32_t zext_read32(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT { uint32_t y = *(uint32_t*)x; @@ -768,11 +768,11 @@ static inline uint64_t zext_read64(const jl_value_t *x, size_t nb) JL_NOTSAFEPOI #endif #if MAX_POINTERATOMIC_SIZE >= 16 -static inline uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT +static inline jl_uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT { - uint128_t y = 0; + jl_uint128_t y = {0}; if (nb == 16) - y = *(uint128_t*)x; + y = *(jl_uint128_t*)x; else memcpy(&y, x, nb); return y; @@ -813,34 +813,34 @@ JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data) size_t nb = jl_datatype_size(bt); // some types have special pools to minimize allocations if (nb == 0) return jl_new_struct_uninit(bt); // returns bt->instance - if (bt == jl_bool_type) return (1 & jl_atomic_load((int8_t*)data)) ? jl_true : jl_false; - if (bt == jl_uint8_type) return jl_box_uint8(jl_atomic_load((uint8_t*)data)); - if (bt == jl_int64_type) return jl_box_int64(jl_atomic_load((int64_t*)data)); - if (bt == jl_int32_type) return jl_box_int32(jl_atomic_load((int32_t*)data)); - if (bt == jl_int8_type) return jl_box_int8(jl_atomic_load((int8_t*)data)); - if (bt == jl_int16_type) return jl_box_int16(jl_atomic_load((int16_t*)data)); - if (bt == jl_uint64_type) return jl_box_uint64(jl_atomic_load((uint64_t*)data)); - if (bt == jl_uint32_type) return jl_box_uint32(jl_atomic_load((uint32_t*)data)); - if (bt == jl_uint16_type) return jl_box_uint16(jl_atomic_load((uint16_t*)data)); - if (bt == jl_char_type) return jl_box_char(jl_atomic_load((uint32_t*)data)); + if (bt == jl_bool_type) return (1 & jl_atomic_load((_Atomic(int8_t)*)data)) ? jl_true : jl_false; + if (bt == jl_uint8_type) return jl_box_uint8(jl_atomic_load((_Atomic(uint8_t)*)data)); + if (bt == jl_int64_type) return jl_box_int64(jl_atomic_load((_Atomic(int64_t)*)data)); + if (bt == jl_int32_type) return jl_box_int32(jl_atomic_load((_Atomic(int32_t)*)data)); + if (bt == jl_int8_type) return jl_box_int8(jl_atomic_load((_Atomic(int8_t)*)data)); + if (bt == jl_int16_type) return jl_box_int16(jl_atomic_load((_Atomic(int16_t)*)data)); + if (bt == jl_uint64_type) return jl_box_uint64(jl_atomic_load((_Atomic(uint64_t)*)data)); + if (bt == jl_uint32_type) return jl_box_uint32(jl_atomic_load((_Atomic(uint32_t)*)data)); + if (bt == jl_uint16_type) return jl_box_uint16(jl_atomic_load((_Atomic(uint16_t)*)data)); + if (bt == jl_char_type) return jl_box_char(jl_atomic_load((_Atomic(uint32_t)*)data)); jl_task_t *ct = jl_current_task; jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt); // data is aligned to the power of two, // we will write too much of v, but the padding should exist if (nb == 1) - *(uint8_t*) v = jl_atomic_load((uint8_t*)data); + *(uint8_t*) v = jl_atomic_load((_Atomic(uint8_t)*)data); else if (nb <= 2) - *(uint16_t*)v = jl_atomic_load((uint16_t*)data); + *(uint16_t*)v = jl_atomic_load((_Atomic(uint16_t)*)data); else if (nb <= 4) - *(uint32_t*)v = jl_atomic_load((uint32_t*)data); + *(uint32_t*)v = jl_atomic_load((_Atomic(uint32_t)*)data); #if MAX_POINTERATOMIC_SIZE >= 8 else if (nb <= 8) - *(uint64_t*)v = jl_atomic_load((uint64_t*)data); + *(uint64_t*)v = jl_atomic_load((_Atomic(uint64_t)*)data); #endif #if MAX_POINTERATOMIC_SIZE >= 16 else if (nb <= 16) - *(uint128_t*)v = jl_atomic_load((uint128_t*)data); + *(jl_uint128_t*)v = jl_atomic_load((_Atomic(jl_uint128_t)*)data); #endif else abort(); @@ -856,18 +856,18 @@ JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb) if (nb == 0) ; else if (nb == 1) - jl_atomic_store((uint8_t*)dst, *(uint8_t*)src); + jl_atomic_store((_Atomic(uint8_t)*)dst, *(uint8_t*)src); else if (nb == 2) - jl_atomic_store((uint16_t*)dst, *(uint16_t*)src); + jl_atomic_store((_Atomic(uint16_t)*)dst, *(uint16_t*)src); else if (nb <= 4) - jl_atomic_store((uint32_t*)dst, zext_read32(src, nb)); + jl_atomic_store((_Atomic(uint32_t)*)dst, zext_read32(src, nb)); #if MAX_POINTERATOMIC_SIZE >= 8 else if (nb <= 8) - jl_atomic_store((uint64_t*)dst, zext_read64(src, nb)); + jl_atomic_store((_Atomic(uint64_t)*)dst, zext_read64(src, nb)); #endif #if MAX_POINTERATOMIC_SIZE >= 16 else if (nb <= 16) - jl_atomic_store((uint128_t*)dst, zext_read128(src, nb)); + jl_atomic_store((_Atomic(jl_uint128_t)*)dst, zext_read128(src, nb)); #endif else abort(); @@ -880,32 +880,32 @@ JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl jl_datatype_t *bt = (jl_datatype_t*)dt; // some types have special pools to minimize allocations if (nb == 0) return jl_new_struct_uninit(bt); // returns bt->instance - if (bt == jl_bool_type) return (1 & jl_atomic_exchange((int8_t*)dst, 1 & *(int8_t*)src)) ? jl_true : jl_false; - if (bt == jl_uint8_type) return jl_box_uint8(jl_atomic_exchange((uint8_t*)dst, *(int8_t*)src)); - if (bt == jl_int64_type) return jl_box_int64(jl_atomic_exchange((int64_t*)dst, *(int64_t*)src)); - if (bt == jl_int32_type) return jl_box_int32(jl_atomic_exchange((int32_t*)dst, *(int32_t*)src)); - if (bt == jl_int8_type) return jl_box_int8(jl_atomic_exchange((int8_t*)dst, *(int8_t*)src)); - if (bt == jl_int16_type) return jl_box_int16(jl_atomic_exchange((int16_t*)dst, *(int16_t*)src)); - if (bt == jl_uint64_type) return jl_box_uint64(jl_atomic_exchange((uint64_t*)dst, *(uint64_t*)src)); - if (bt == jl_uint32_type) return jl_box_uint32(jl_atomic_exchange((uint32_t*)dst, *(uint32_t*)src)); - if (bt == jl_uint16_type) return jl_box_uint16(jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src)); - if (bt == jl_char_type) return jl_box_char(jl_atomic_exchange((uint32_t*)dst, *(uint32_t*)src)); + if (bt == jl_bool_type) return (1 & jl_atomic_exchange((_Atomic(int8_t)*)dst, 1 & *(int8_t*)src)) ? jl_true : jl_false; + if (bt == jl_uint8_type) return jl_box_uint8(jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(int8_t*)src)); + if (bt == jl_int64_type) return jl_box_int64(jl_atomic_exchange((_Atomic(int64_t)*)dst, *(int64_t*)src)); + if (bt == jl_int32_type) return jl_box_int32(jl_atomic_exchange((_Atomic(int32_t)*)dst, *(int32_t*)src)); + if (bt == jl_int8_type) return jl_box_int8(jl_atomic_exchange((_Atomic(int8_t)*)dst, *(int8_t*)src)); + if (bt == jl_int16_type) return jl_box_int16(jl_atomic_exchange((_Atomic(int16_t)*)dst, *(int16_t*)src)); + if (bt == jl_uint64_type) return jl_box_uint64(jl_atomic_exchange((_Atomic(uint64_t)*)dst, *(uint64_t*)src)); + if (bt == jl_uint32_type) return jl_box_uint32(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src)); + if (bt == jl_uint16_type) return jl_box_uint16(jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src)); + if (bt == jl_char_type) return jl_box_char(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src)); jl_task_t *ct = jl_current_task; jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt); if (nb == 1) - *(uint8_t*)v = jl_atomic_exchange((uint8_t*)dst, *(uint8_t*)src); + *(uint8_t*)v = jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(uint8_t*)src); else if (nb == 2) - *(uint16_t*)v = jl_atomic_exchange((uint16_t*)dst, *(uint16_t*)src); + *(uint16_t*)v = jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src); else if (nb <= 4) - *(uint32_t*)v = jl_atomic_exchange((uint32_t*)dst, zext_read32(src, nb)); + *(uint32_t*)v = jl_atomic_exchange((_Atomic(uint32_t)*)dst, zext_read32(src, nb)); #if MAX_POINTERATOMIC_SIZE >= 8 else if (nb <= 8) - *(uint64_t*)v = jl_atomic_exchange((uint64_t*)dst, zext_read64(src, nb)); + *(uint64_t*)v = jl_atomic_exchange((_Atomic(uint64_t)*)dst, zext_read64(src, nb)); #endif #if MAX_POINTERATOMIC_SIZE >= 16 else if (nb <= 16) - *(uint128_t*)v = jl_atomic_exchange((uint128_t*)dst, zext_read128(src, nb)); + *(jl_uint128_t*)v = jl_atomic_exchange((_Atomic(jl_uint128_t)*)dst, zext_read128(src, nb)); #endif else abort(); @@ -922,29 +922,29 @@ JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expect } else if (nb == 1) { uint8_t y = *(uint8_t*)expected; - success = jl_atomic_cmpswap((uint8_t*)dst, &y, *(uint8_t*)src); + success = jl_atomic_cmpswap((_Atomic(uint8_t)*)dst, &y, *(uint8_t*)src); } else if (nb == 2) { uint16_t y = *(uint16_t*)expected; - success = jl_atomic_cmpswap((uint16_t*)dst, &y, *(uint16_t*)src); + success = jl_atomic_cmpswap((_Atomic(uint16_t)*)dst, &y, *(uint16_t*)src); } else if (nb <= 4) { uint32_t y = zext_read32(expected, nb); uint32_t z = zext_read32(src, nb); - success = jl_atomic_cmpswap((uint32_t*)dst, &y, z); + success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, &y, z); } #if MAX_POINTERATOMIC_SIZE >= 8 else if (nb <= 8) { uint64_t y = zext_read64(expected, nb); uint64_t z = zext_read64(src, nb); - success = jl_atomic_cmpswap((uint64_t*)dst, &y, z); + success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, &y, z); } #endif #if MAX_POINTERATOMIC_SIZE >= 16 else if (nb <= 16) { - uint128_t y = zext_read128(expected, nb); - uint128_t z = zext_read128(src, nb); - success = jl_atomic_cmpswap((uint128_t*)dst, &y, z); + jl_uint128_t y = zext_read128(expected, nb); + jl_uint128_t z = zext_read128(src, nb); + success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, &y, z); } #endif else { @@ -971,10 +971,10 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t if (dt == et) { *y8 = *(uint8_t*)expected; uint8_t z8 = *(uint8_t*)src; - success = jl_atomic_cmpswap((uint8_t*)dst, y8, z8); + success = jl_atomic_cmpswap((_Atomic(uint8_t)*)dst, y8, z8); } else { - *y8 = jl_atomic_load((uint8_t*)dst); + *y8 = jl_atomic_load((_Atomic(uint8_t)*)dst); success = 0; } } @@ -984,10 +984,10 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t if (dt == et) { *y16 = *(uint16_t*)expected; uint16_t z16 = *(uint16_t*)src; - success = jl_atomic_cmpswap((uint16_t*)dst, y16, z16); + success = jl_atomic_cmpswap((_Atomic(uint16_t)*)dst, y16, z16); } else { - *y16 = jl_atomic_load((uint16_t*)dst); + *y16 = jl_atomic_load((_Atomic(uint16_t)*)dst); success = 0; } } @@ -997,13 +997,13 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *y32 = zext_read32(expected, nb); uint32_t z32 = zext_read32(src, nb); while (1) { - success = jl_atomic_cmpswap((uint32_t*)dst, y32, z32); + success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32); if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt)) break; } } else { - *y32 = jl_atomic_load((uint32_t*)dst); + *y32 = jl_atomic_load((_Atomic(uint32_t)*)dst); success = 0; } } @@ -1014,31 +1014,31 @@ JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *y64 = zext_read64(expected, nb); uint64_t z64 = zext_read64(src, nb); while (1) { - success = jl_atomic_cmpswap((uint64_t*)dst, y64, z64); + success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64); if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt)) break; } } else { - *y64 = jl_atomic_load((uint64_t*)dst); + *y64 = jl_atomic_load((_Atomic(uint64_t)*)dst); success = 0; } } #endif #if MAX_POINTERATOMIC_SIZE >= 16 else if (nb <= 16) { - uint128_t *y128 = (uint128_t*)y; + jl_uint128_t *y128 = (jl_uint128_t*)y; if (dt == et) { *y128 = zext_read128(expected, nb); - uint128_t z128 = zext_read128(src, nb); + jl_uint128_t z128 = zext_read128(src, nb); while (1) { - success = jl_atomic_cmpswap((uint128_t*)dst, y128, z128); + success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128); if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt)) break; } } else { - *y128 = jl_atomic_load((uint128_t*)dst); + *y128 = jl_atomic_load((_Atomic(jl_uint128_t)*)dst); success = 0; } } @@ -1393,7 +1393,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i) jl_bounds_error_int(v, i + 1); size_t offs = jl_field_offset(st, i); if (jl_field_isptr(st, i)) { - return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs)); + return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs)); } jl_value_t *ty = jl_field_type_concrete(st, i); int isatomic = jl_field_isatomic(st, i); @@ -1430,7 +1430,7 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_RO assert(i < jl_datatype_nfields(st)); size_t offs = jl_field_offset(st,i); assert(jl_field_isptr(st,i)); - return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs)); + return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs)); } JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i) @@ -1471,7 +1471,7 @@ void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, return; } if (jl_field_isptr(st, i)) { - jl_atomic_store_relaxed((jl_value_t**)((char*)v + offs), rhs); + jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs), rhs); jl_gc_wb(v, rhs); } else { @@ -1521,9 +1521,9 @@ jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_ jl_value_t *r; if (jl_field_isptr(st, i)) { if (isatomic) - r = jl_atomic_exchange((jl_value_t**)((char*)v + offs), rhs); + r = jl_atomic_exchange((_Atomic(jl_value_t*)*)((char*)v + offs), rhs); else - r = jl_atomic_exchange_relaxed((jl_value_t**)((char*)v + offs), rhs); + r = jl_atomic_exchange_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs), rhs); jl_gc_wb(v, rhs); } else { @@ -1593,7 +1593,7 @@ jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_valu if (!jl_isa(y, ty)) jl_type_error("modifyfield!", ty, y); if (jl_field_isptr(st, i)) { - jl_value_t **p = (jl_value_t**)((char*)v + offs); + _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs); if (isatomic ? jl_atomic_cmpswap(p, &r, y) : jl_atomic_cmpswap_relaxed(p, &r, y)) break; } @@ -1672,7 +1672,7 @@ jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_val jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty); JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE) if (jl_field_isptr(st, i)) { - jl_value_t **p = (jl_value_t**)((char*)v + offs); + _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs); int success; while (1) { success = isatomic ? jl_atomic_cmpswap(p, &r, rhs) : jl_atomic_cmpswap_relaxed(p, &r, rhs); @@ -1758,7 +1758,7 @@ JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT { jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v); size_t offs = jl_field_offset(st, i); - jl_value_t **fld = (jl_value_t**)((char*)v + offs); + _Atomic(jl_value_t*) *fld = (_Atomic(jl_value_t*)*)((char*)v + offs); if (!jl_field_isptr(st, i)) { jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(st, i); if (!jl_is_datatype(ft) || ft->layout->first_ptr < 0) diff --git a/src/dump.c b/src/dump.c index 20d3f5689d353..835c88e92bad1 100644 --- a/src/dump.c +++ b/src/dump.c @@ -1650,8 +1650,9 @@ static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DIS break; jl_binding_t *b = jl_get_binding_wr(m, asname, 1); b->name = (jl_sym_t*)jl_deserialize_value(s, (jl_value_t**)&b->name); - b->value = jl_deserialize_value(s, &b->value); - if (b->value != NULL) jl_gc_wb(m, b->value); + jl_value_t *bvalue = jl_deserialize_value(s, (jl_value_t**)&b->value); + *(jl_value_t**)&b->value = bvalue; + if (bvalue != NULL) jl_gc_wb(m, bvalue); b->globalref = jl_deserialize_value(s, &b->globalref); if (b->globalref != NULL) jl_gc_wb(m, b->globalref); b->owner = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&b->owner); diff --git a/src/gc-stacks.c b/src/gc-stacks.c index fb43affe53b0d..b7adf254026ca 100644 --- a/src/gc-stacks.c +++ b/src/gc-stacks.c @@ -23,7 +23,7 @@ #define MIN_STACK_MAPPINGS_PER_POOL 5 const size_t jl_guard_size = (4096 * 8); -static uint32_t num_stack_mappings = 0; +static _Atomic(uint32_t) num_stack_mappings = 0; #ifdef _OS_WINDOWS_ #define MAP_FAILED NULL diff --git a/src/gc.c b/src/gc.c index 8fb0e00e8f17b..9f5deec019b53 100644 --- a/src/gc.c +++ b/src/gc.c @@ -132,7 +132,7 @@ static jl_mutex_t gc_cache_lock; // Flag that tells us whether we need to support conservative marking // of objects. -static int support_conservative_marking = 0; +static _Atomic(int) support_conservative_marking = 0; /** * Note about GC synchronization: @@ -166,7 +166,7 @@ static int support_conservative_marking = 0; * finalizers in unmanaged (GC safe) mode. */ -jl_gc_num_t gc_num = {0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +jl_gc_num_t gc_num = {0}; static size_t last_long_collect_interval; pagetable_t memory_map; @@ -298,7 +298,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o, // This way, the mutation should not conflict with the owning thread, // which only writes to locations later than `len` // and will not resize the buffer without acquiring the lock. - size_t len = need_sync ? jl_atomic_load_acquire(&list->len) : list->len; + size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len; size_t oldlen = len; void **items = list->items; size_t j = 0; @@ -331,7 +331,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o, // The `memset` (like any other content mutation) has to be done // **before** the `cmpxchg` which publishes the length. memset(&items[len], 0, (oldlen - len) * sizeof(void*)); - jl_atomic_cmpswap(&list->len, &oldlen, len); + jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len); } else { list->len = len; @@ -484,7 +484,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { - assert(ptls->gc_state == 0); + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); arraylist_t *a = &ptls->finalizers; // This acquire load and the release store at the end are used to // synchronize with `finalize_object` on another thread. Apart from the GC, @@ -493,7 +493,7 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT // (only one thread since it needs to acquire the finalizer lock). // Similar to `finalize_object`, all content mutation has to be done // between the acquire and the release of the length. - size_t oldlen = jl_atomic_load_acquire(&a->len); + size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len); if (__unlikely(oldlen + 2 > a->max)) { JL_LOCK_NOGC(&finalizers_lock); // `a->len` might have been modified. @@ -507,7 +507,7 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT void **items = a->items; items[oldlen] = v; items[oldlen + 1] = f; - jl_atomic_store_release(&a->len, oldlen + 2); + jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2); } JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT @@ -738,7 +738,7 @@ STATIC_INLINE int gc_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode, assert((tag & 0x3) == mark_mode); } *bits = mark_mode; - tag = jl_atomic_exchange_relaxed(&o->header, tag); + tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag); verify_val(jl_valueof(o)); return !gc_marked(tag); } @@ -781,7 +781,8 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, jl_assume(page); if (mark_mode == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += page->osize; - jl_atomic_fetch_add_relaxed(&page->nold, 1); + static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), ""); + jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1); } else { ptls->gc_cache.scanned_bytes += page->osize; @@ -790,7 +791,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET; int obj_id = (((char*)o) - page_begin) / page->osize; uint8_t *ages = page->ages + obj_id / 8; - jl_atomic_fetch_and_relaxed(ages, ~(1 << (obj_id % 8))); + jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8))); } } objprofile_count(jl_typeof(jl_valueof(o)), @@ -877,7 +878,7 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT static inline void maybe_collect(jl_ptls_t ptls) { - if (ptls->gc_num.allocd >= 0 || gc_debug_check_other()) { + if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || gc_debug_check_other()) { jl_gc_collect(JL_GC_AUTO); } else { @@ -956,8 +957,10 @@ JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz) jl_throw(jl_memory_exception); gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t, gc_cblist_notify_external_alloc, (v, allocsz)); - ptls->gc_num.allocd += allocsz; - ptls->gc_num.bigalloc++; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_num.bigalloc, + jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1); #ifdef MEMDEBUG memset(v, 0xee, allocsz); #endif @@ -1050,7 +1053,8 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT { jl_ptls_t ptls = jl_current_task->ptls; - ptls->gc_num.allocd += sz; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); } static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT @@ -1074,8 +1078,8 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT for (int i = 0; i < jl_n_threads; i++) { jl_ptls_t ptls = jl_all_tls_states[i]; if (ptls) { - memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t)); - ptls->gc_num.allocd = -(int64_t)gc_num.interval; + memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); } } } @@ -1198,13 +1202,15 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, // to workaround a llvm bug. // Ref https://llvm.org/bugs/show_bug.cgi?id=27190 jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + pool_offset); - assert(ptls->gc_state == 0); + assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); #ifdef MEMDEBUG return jl_gc_big_alloc(ptls, osize); #endif maybe_collect(ptls); - ptls->gc_num.allocd += osize; - ptls->gc_num.poolalloc++; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize); + jl_atomic_store_relaxed(&ptls->gc_num.poolalloc, + jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1); // first try to use the freelist jl_taggedvalue_t *v = p->freelist; if (v) { @@ -2860,7 +2866,7 @@ static void sweep_finalizer_list(arraylist_t *list) } // collector entry point and control -static volatile uint32_t jl_gc_disable_counter = 1; +static _Atomic(uint32_t) jl_gc_disable_counter = 1; JL_DLLEXPORT int jl_gc_enable(int on) { @@ -3212,15 +3218,16 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) jl_task_t *ct = jl_current_task; jl_ptls_t ptls = ct->ptls; - if (jl_gc_disable_counter) { - size_t localbytes = ptls->gc_num.allocd + gc_num.interval; - ptls->gc_num.allocd = -(int64_t)gc_num.interval; - jl_atomic_add_fetch(&gc_num.deferred_alloc, localbytes); + if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) { + size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); + static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), ""); + jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes); return; } gc_debug_print(); - int8_t old_state = ptls->gc_state; + int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state); jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING); // `jl_safepoint_start_gc()` makes sure only one thread can // run the GC. @@ -3244,7 +3251,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) gc_invoke_callbacks(jl_gc_cb_pre_gc_t, gc_cblist_pre_gc, (collection)); - if (!jl_gc_disable_counter) { + if (!jl_atomic_load_relaxed(&jl_gc_disable_counter)) { JL_LOCK_NOGC(&finalizers_lock); if (_jl_gc_collect(ptls, collection)) { // recollect @@ -3329,9 +3336,9 @@ void jl_init_thread_heap(jl_ptls_t ptls) gc_cache->pc_stack_end = gc_cache->pc_stack + init_size; gc_cache->data_stack = (jl_gc_mark_data_t *)malloc_s(init_size * sizeof(jl_gc_mark_data_t)); - memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t)); + memset(&ptls->gc_num, 0, sizeof(ptls->gc_num)); assert(gc_num.interval == default_collect_interval); - ptls->gc_num.allocd = -(int64_t)gc_num.interval; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval); } // System-wide initializations @@ -3376,8 +3383,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - ptls->gc_num.allocd += sz; - ptls->gc_num.malloc++; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); } return malloc(sz); } @@ -3389,8 +3398,10 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz) if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); - ptls->gc_num.allocd += nm*sz; - ptls->gc_num.malloc++; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); } return calloc(nm, sz); } @@ -3402,8 +3413,10 @@ JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz) free(p); if (pgcstack && ct->world_age) { jl_ptls_t ptls = ct->ptls; - ptls->gc_num.freed += sz; - ptls->gc_num.freecall++; + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz); + jl_atomic_store_relaxed(&ptls->gc_num.freecall, + jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1); } } @@ -3415,10 +3428,13 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size jl_ptls_t ptls = ct->ptls; maybe_collect(ptls); if (sz < old) - ptls->gc_num.freed += (old - sz); + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz)); else - ptls->gc_num.allocd += (sz - old); - ptls->gc_num.realloc++; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old)); + jl_atomic_store_relaxed(&ptls->gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); } return realloc(p, sz); } @@ -3482,8 +3498,10 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT); if (allocsz < sz) // overflow in adding offs, size was "negative" jl_throw(jl_memory_exception); - ptls->gc_num.allocd += allocsz; - ptls->gc_num.malloc++; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz); + jl_atomic_store_relaxed(&ptls->gc_num.malloc, + jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1); int last_errno = errno; #ifdef _OS_WINDOWS_ DWORD last_error = GetLastError(); @@ -3513,10 +3531,13 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds live_bytes += allocsz - oldsz; } else if (allocsz < oldsz) - ptls->gc_num.freed += (oldsz - allocsz); + jl_atomic_store_relaxed(&ptls->gc_num.freed, + jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz)); else - ptls->gc_num.allocd += (allocsz - oldsz); - ptls->gc_num.realloc++; + jl_atomic_store_relaxed(&ptls->gc_num.allocd, + jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz)); + jl_atomic_store_relaxed(&ptls->gc_num.realloc, + jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1); int last_errno = errno; #ifdef _OS_WINDOWS_ @@ -3731,8 +3752,8 @@ JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) } return result; } else { - int result = support_conservative_marking; - support_conservative_marking = 1; + int result = jl_atomic_load(&support_conservative_marking); + jl_atomic_store(&support_conservative_marking, 1); return result; } } diff --git a/src/interpreter.c b/src/interpreter.c index ea93527d88938..e0284f06d38c2 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -93,7 +93,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s) } jl_value_t *bp_owner = (jl_value_t*)modu; jl_binding_t *b = jl_get_binding_for_method_def(modu, fname); - jl_value_t **bp = &b->value; + _Atomic(jl_value_t*) *bp = &b->value; jl_value_t *gf = jl_generic_function_def(b->name, b->owner, bp, bp_owner, b); return gf; } diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 60b1903aaa802..ffddff30ae1a9 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -173,14 +173,14 @@ static jl_callptr_t _jl_compile_codeinst( // once set, don't change invoke-ptr, as that leads to race conditions // with the (not) simultaneous updates to invoke and specptr if (!decls.specFunctionObject.empty()) { - this_code->specptr.fptr = (void*)getAddressForFunction(decls.specFunctionObject); + jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject)); this_code->isspecsig = isspecsig; } jl_atomic_store_release(&this_code->invoke, addr); } else if (this_code->invoke == jl_fptr_const_return && !decls.specFunctionObject.empty()) { // hack to export this pointer value to jl_dump_method_disasm - this_code->specptr.fptr = (void*)getAddressForFunction(decls.specFunctionObject); + jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject)); } if (this_code== codeinst) fptr = addr; @@ -410,10 +410,10 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world, // printing via disassembly jl_code_instance_t *codeinst = jl_generate_fptr(mi, world); if (codeinst) { - uintptr_t fptr = (uintptr_t)codeinst->invoke; + uintptr_t fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke); if (getwrapper) return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary); - uintptr_t specfptr = (uintptr_t)codeinst->specptr.fptr; + uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (fptr == (uintptr_t)&jl_fptr_const_return && specfptr == 0) { // normally we prevent native code from being generated for these functions, // (using sentinel value `1` instead) @@ -423,7 +423,7 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world, uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled); if (measure_compile_time_enabled) compiler_start_time = jl_hrtime(); - specfptr = (uintptr_t)codeinst->specptr.fptr; + specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (specfptr == 0) { jl_code_info_t *src = jl_type_infer(mi, world, 0); JL_GC_PUSH1(&src); @@ -436,12 +436,12 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world, if (src && (jl_value_t*)src != jl_nothing) src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src); } - fptr = (uintptr_t)codeinst->invoke; - specfptr = (uintptr_t)codeinst->specptr.fptr; + fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke); + specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr); if (src && jl_is_code_info(src)) { if (fptr == (uintptr_t)&jl_fptr_const_return && specfptr == 0) { fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world); - specfptr = (uintptr_t)codeinst->specptr.fptr; + specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr); } } JL_GC_POP(); @@ -829,13 +829,14 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *cod std::string string_fname; raw_string_ostream stream_fname(string_fname); // try to pick an appropriate name that describes it - if (Addr == (uintptr_t)codeinst->invoke) { + jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke); + if (Addr == (uintptr_t)invoke) { stream_fname << "jsysw_"; } - else if (codeinst->invoke == &jl_fptr_args) { + else if (invoke == &jl_fptr_args) { stream_fname << "jsys1_"; } - else if (codeinst->invoke == &jl_fptr_sparam) { + else if (invoke == &jl_fptr_sparam) { stream_fname << "jsys3_"; } else { diff --git a/src/jl_uv.c b/src/jl_uv.c index 719d3bf9c6010..0ae68d9666ced 100644 --- a/src/jl_uv.c +++ b/src/jl_uv.c @@ -57,7 +57,7 @@ void jl_init_uv(void) JL_MUTEX_INIT(&jl_uv_mutex); // a file-scope initializer can be used instead } -int jl_uv_n_waiters = 0; +_Atomic(int) jl_uv_n_waiters = 0; void JL_UV_LOCK(void) { diff --git a/src/julia.h b/src/julia.h index de82ce182a517..b915ce02721f6 100644 --- a/src/julia.h +++ b/src/julia.h @@ -127,7 +127,7 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT { // Do not call this on a value that is already initialized. jl_taggedvalue_t *tag = jl_astaggedvalue(v); - jl_atomic_store_relaxed(&tag->type, (jl_value_t*)t); + jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)&tag->type, (jl_value_t*)t); } #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t)) @@ -135,8 +135,8 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT // The string data is nul-terminated and hangs off the end of the struct. typedef struct _jl_sym_t { JL_DATA_TYPE - struct _jl_sym_t *left; - struct _jl_sym_t *right; + _Atomic(struct _jl_sym_t*) left; + _Atomic(struct _jl_sym_t*) right; uintptr_t hash; // precomputed hash value // JL_ATTRIBUTE_ALIGN_PTRSIZE(char name[]); } jl_sym_t; @@ -229,14 +229,6 @@ typedef jl_value_t *(*jl_fptr_sparam_t)(jl_value_t*, jl_value_t**, uint32_t, jl_ JL_DLLEXPORT extern jl_call_t jl_fptr_interpret_call; -JL_EXTENSION typedef union { - void* fptr; - jl_fptr_args_t fptr1; - // 2 constant - jl_fptr_sparam_t fptr3; - // 4 interpreter -} jl_generic_specptr_t; - typedef struct _jl_method_instance_t jl_method_instance_t; typedef struct _jl_line_info_node_t { @@ -385,8 +377,14 @@ typedef struct _jl_code_instance_t { // compilation state cache uint8_t isspecsig; // if specptr is a specialized function signature for specTypes->rettype uint8_t precompile; // if set, this will be added to the output system image - jl_callptr_t invoke; // jlcall entry point - jl_generic_specptr_t specptr; // private data for `jlcall entry point` + _Atomic(jl_callptr_t) invoke; // jlcall entry point + union _jl_generic_specptr_t { + _Atomic(void*) fptr; + _Atomic(jl_fptr_args_t) fptr1; + // 2 constant + _Atomic(jl_fptr_sparam_t) fptr3; + // 4 interpreter + } specptr; // private data for `jlcall entry point } jl_code_instance_t; // all values are callable as Functions @@ -510,10 +508,10 @@ typedef struct { typedef struct { // not first-class jl_sym_t *name; - jl_value_t *value; - jl_value_t *globalref; // cached GlobalRef for this binding - struct _jl_module_t *owner; // for individual imported bindings - uint8_t constp; + _Atomic(jl_value_t*) value; + jl_value_t* globalref; // cached GlobalRef for this binding -- TODO: make _Atomic + struct _jl_module_t* owner; // for individual imported bindings -- TODO: make _Atomic + uint8_t constp:1; uint8_t exportp:1; uint8_t imported:1; uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package @@ -534,7 +532,7 @@ typedef struct _jl_module_t { uint64_t build_id; jl_uuid_t uuid; size_t primary_world; - uint32_t counter; + _Atomic(uint32_t) counter; int32_t nospecialize; // global bit flags: initialization for new methods int8_t optlevel; int8_t compile; @@ -904,7 +902,7 @@ STATIC_INLINE jl_value_t *jl_svecref(void *t JL_PROPAGATES_ROOT, size_t i) JL_NO assert(i < jl_svec_len(t)); // while svec is supposedly immutable, in practice we sometimes publish it first // and set the values lazily - return jl_atomic_load_relaxed(jl_svec_data(t) + i); + return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)jl_svec_data(t) + i); } STATIC_INLINE jl_value_t *jl_svecset( void *t JL_ROOTING_ARGUMENT JL_PROPAGATES_ROOT, @@ -948,7 +946,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i) { assert(((jl_array_t*)a)->flags.ptrarray); assert(i < jl_array_len(a)); - return jl_atomic_load_relaxed(((jl_value_t**)(jl_array_data(a))) + i); + return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i); } STATIC_INLINE jl_value_t *jl_array_ptr_set( void *a JL_ROOTING_ARGUMENT, size_t i, @@ -956,7 +954,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set( { assert(((jl_array_t*)a)->flags.ptrarray); assert(i < jl_array_len(a)); - jl_atomic_store_relaxed(((jl_value_t**)(jl_array_data(a))) + i, (jl_value_t*)x); + jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i, (jl_value_t*)x); if (x) { if (((jl_array_t*)a)->flags.how == 3) { a = jl_array_data_owner(a); @@ -1412,7 +1410,7 @@ JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len); JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void); JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name, jl_module_t *module, - jl_value_t **bp, jl_value_t *bp_owner, + _Atomic(jl_value_t*) *bp, jl_value_t *bp_owner, jl_binding_t *bnd); JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module); JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo); diff --git a/src/julia_internal.h b/src/julia_internal.h index 35de5b927eeb6..3006ef27930a8 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -105,7 +105,7 @@ void __tsan_switch_to_fiber(void *fiber, unsigned flags); static uv_loop_t *const unused_uv_loop_arg = (uv_loop_t *)0xBAD10; extern jl_mutex_t jl_uv_mutex; -extern int jl_uv_n_waiters; +extern _Atomic(int) jl_uv_n_waiters; void JL_UV_LOCK(void); #define JL_UV_UNLOCK() JL_UNLOCK(&jl_uv_mutex) @@ -155,8 +155,8 @@ static inline uint64_t cycleclock(void) #include "timing.h" // Global *atomic* integers controlling *process-wide* measurement of compilation time. -extern uint8_t jl_measure_compile_time_enabled; -extern uint64_t jl_cumulative_compile_time; +extern _Atomic(uint8_t) jl_measure_compile_time_enabled; +extern _Atomic(uint64_t) jl_cumulative_compile_time; #ifdef _COMPILER_MICROSOFT_ # define jl_return_address() ((uintptr_t)_ReturnAddress()) @@ -184,14 +184,16 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a) static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT { size_t i; + _Atomic(void*) *srcpa = (_Atomic(void*)*)srcp; + _Atomic(void*) *dstpa = (_Atomic(void*)*)dstp; if (dstp < srcp || dstp > srcp + n) { for (i = 0; i < n; i++) { - jl_atomic_store_relaxed(dstp + i, jl_atomic_load_relaxed(srcp + i)); + jl_atomic_store_relaxed(dstpa + i, jl_atomic_load_relaxed(srcpa + i)); } } else { for (i = 0; i < n; i++) { - jl_atomic_store_relaxed(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1)); + jl_atomic_store_relaxed(dstpa + n - i - 1, jl_atomic_load_relaxed(srcpa + n - i - 1)); } } } @@ -805,7 +807,7 @@ static inline void jl_set_gc_and_wait(void) jl_task_t *ct = jl_current_task; // reading own gc state doesn't need atomic ops since no one else // should store to it. - int8_t state = ct->ptls->gc_state; + int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state); jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING); jl_safepoint_wait_gc(); jl_atomic_store_release(&ct->ptls->gc_state, state); @@ -1114,7 +1116,7 @@ extern void *jl_winsock_handle; void *jl_get_library_(const char *f_lib, int throw_err); #define jl_get_library(f_lib) jl_get_library_(f_lib, 1) -JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, void **hnd); +JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd); JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name); JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline( jl_value_t *fobj, jl_datatype_t *result, htable_t *cache, jl_svec_t *fill, diff --git a/src/julia_threads.h b/src/julia_threads.h index 23fa8d1ea7864..25ca1fb14b2f9 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -97,7 +97,7 @@ typedef pthread_t jl_thread_t; // Recursive spin lock typedef struct { - volatile jl_thread_t owner; + _Atomic(jl_thread_t) owner; uint32_t count; } jl_mutex_t; @@ -108,13 +108,13 @@ typedef struct { } jl_gc_pool_t; typedef struct { - int64_t allocd; - int64_t freed; - uint64_t malloc; - uint64_t realloc; - uint64_t poolalloc; - uint64_t bigalloc; - uint64_t freecall; + _Atomic(int64_t) allocd; + _Atomic(int64_t) freed; + _Atomic(uint64_t) malloc; + _Atomic(uint64_t) realloc; + _Atomic(uint64_t) poolalloc; + _Atomic(uint64_t) bigalloc; + _Atomic(uint64_t) freecall; } jl_thread_gc_num_t; typedef struct { @@ -202,7 +202,7 @@ typedef struct _jl_tls_states_t { #define JL_GC_STATE_SAFE 2 // gc_state = 2 means the thread is running unmanaged code that can be // execute at the same time with the GC. - int8_t gc_state; // read from foreign threads + _Atomic(int8_t) gc_state; // read from foreign threads // execution of certain certain impure // statements is prohibited from certain // callbacks (such as generated functions) diff --git a/src/llvm-version.h b/src/llvm-version.h index f59f7826c334d..fd7b47fc9461e 100644 --- a/src/llvm-version.h +++ b/src/llvm-version.h @@ -21,7 +21,9 @@ #define LLVM_ENABLE_STATS 0 #endif +#ifdef __cplusplus #if defined(__GNUC__) && (__GNUC__ >= 9) // Added in GCC 9, this warning is annoying #pragma GCC diagnostic ignored "-Winit-list-lifetime" #endif +#endif diff --git a/src/method.c b/src/method.c index 32011cab1fd82..0cb67e3c92c1a 100644 --- a/src/method.c +++ b/src/method.c @@ -784,7 +784,8 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name // empty generic function def JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name, jl_module_t *module, - jl_value_t **bp, jl_value_t *bp_owner, + _Atomic(jl_value_t*) *bp, + jl_value_t *bp_owner, jl_binding_t *bnd) { jl_value_t *gf = NULL; @@ -792,16 +793,16 @@ JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name, assert(name && bp); if (bnd && bnd->value != NULL && !bnd->constp) jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(bnd->name)); - if (*bp != NULL) { - gf = *bp; + gf = jl_atomic_load_relaxed(bp); + if (gf != NULL) { if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf)) jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name)); } if (bnd) bnd->constp = 1; - if (*bp == NULL) { + if (gf == NULL) { gf = (jl_value_t*)jl_new_generic_function(name, module); - *bp = gf; + jl_atomic_store(bp, gf); // TODO: fix constp assignment data race if (bp_owner) jl_gc_wb(bp_owner, gf); } return gf; diff --git a/src/module.c b/src/module.c index 4120b6cb9225d..9fd6af2cd575b 100644 --- a/src/module.c +++ b/src/module.c @@ -640,7 +640,8 @@ JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var jl_binding_t *bp = jl_get_binding_wr(m, var, 1); if (bp->value == NULL) { uint8_t constp = 0; - if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) { + // if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) { + if (constp = bp->constp, bp->constp = 1, constp == 0) { jl_value_t *old = NULL; if (jl_atomic_cmpswap(&bp->value, &old, val)) { jl_gc_wb_binding(bp, val); diff --git a/src/rtutils.c b/src/rtutils.c index 67d17c39c67ec..99af741993c44 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -222,7 +222,7 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh) // Must have no safepoint eh->prev = ct->eh; eh->gcstack = ct->gcstack; - eh->gc_state = ct->ptls->gc_state; + eh->gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state); eh->locks_len = ct->ptls->locks.len; eh->defer_signal = ct->ptls->defer_signal; eh->world_age = ct->world_age; @@ -250,7 +250,7 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh) // This function should **NOT** have any safepoint before the ones at the // end. sig_atomic_t old_defer_signal = ct->ptls->defer_signal; - int8_t old_gc_state = ct->ptls->gc_state; + int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state); ct->eh = eh->prev; ct->gcstack = eh->gcstack; small_arraylist_t *locks = &ct->ptls->locks; diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp index ba265eb67be76..ebbc9668bf6a3 100644 --- a/src/runtime_ccall.cpp +++ b/src/runtime_ccall.cpp @@ -54,7 +54,7 @@ void *jl_get_library_(const char *f_lib, int throw_err) } extern "C" JL_DLLEXPORT -void *jl_load_and_lookup(const char *f_lib, const char *f_name, void **hnd) +void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd) { void *handle = jl_atomic_load_acquire(hnd); if (!handle) diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c index 741bb5448b847..00c110f7ce1c5 100644 --- a/src/runtime_intrinsics.c +++ b/src/runtime_intrinsics.c @@ -83,7 +83,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerref(jl_value_t *p, jl_value_t *order) jl_value_t *ety = jl_tparam0(jl_typeof(p)); char *pp = (char*)jl_unbox_long(p); if (ety == (jl_value_t*)jl_any_type) { - return jl_atomic_load((jl_value_t**)pp); + return jl_atomic_load((_Atomic(jl_value_t*)*)pp); } else { if (!is_valid_intrinsic_elptr(ety)) @@ -103,7 +103,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerset(jl_value_t *p, jl_value_t *x, jl_v jl_value_t *ety = jl_tparam0(jl_typeof(p)); char *pp = (char*)jl_unbox_long(p); if (ety == (jl_value_t*)jl_any_type) { - jl_atomic_store((jl_value_t**)pp, x); + jl_atomic_store((_Atomic(jl_value_t*)*)pp, x); } else { if (!is_valid_intrinsic_elptr(ety)) @@ -127,7 +127,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerswap(jl_value_t *p, jl_value_t *x, jl_ jl_value_t *y; char *pp = (char*)jl_unbox_long(p); if (ety == (jl_value_t*)jl_any_type) { - y = jl_atomic_exchange((jl_value_t**)pp, x); + y = jl_atomic_exchange((_Atomic(jl_value_t*)*)pp, x); } else { if (!is_valid_intrinsic_elptr(ety)) @@ -151,7 +151,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, j char *pp = (char*)jl_unbox_long(p); jl_value_t *expected; if (ety == (jl_value_t*)jl_any_type) { - expected = jl_atomic_load((jl_value_t**)pp); + expected = jl_atomic_load((_Atomic(jl_value_t*)*)pp); } else { if (!is_valid_intrinsic_elptr(ety)) @@ -169,7 +169,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, j jl_value_t *y = jl_apply_generic(f, args, 2); args[1] = y; if (ety == (jl_value_t*)jl_any_type) { - if (jl_atomic_cmpswap((jl_value_t**)pp, &expected, y)) + if (jl_atomic_cmpswap((_Atomic(jl_value_t*)*)pp, &expected, y)) break; } else { @@ -215,7 +215,7 @@ JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *exp result = expected; int success; while (1) { - success = jl_atomic_cmpswap((jl_value_t**)pp, &result, x); + success = jl_atomic_cmpswap((_Atomic(jl_value_t*)*)pp, &result, x); if (success || !jl_egal(result, expected)) break; } diff --git a/src/safepoint.c b/src/safepoint.c index d54c7c62bec56..d4f0abda72ab8 100644 --- a/src/safepoint.c +++ b/src/safepoint.c @@ -115,7 +115,7 @@ int jl_safepoint_start_gc(void) return 1; } // The thread should have set this already - assert(jl_current_task->ptls->gc_state == JL_GC_STATE_WAITING); + assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) == JL_GC_STATE_WAITING); jl_mutex_lock_nogc(&safepoint_lock); // In case multiple threads enter the GC at the same time, only allow // one of them to actually run the collection. We can't just let the @@ -157,7 +157,7 @@ void jl_safepoint_end_gc(void) void jl_safepoint_wait_gc(void) { // The thread should have set this is already - assert(jl_current_task->ptls->gc_state != 0); + assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) != 0); // Use normal volatile load in the loop for speed until GC finishes. // Then use an acquire load to make sure the GC result is visible on this thread. while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) { diff --git a/src/symbol.c b/src/symbol.c index fe8e975f8f525..d23ecdb8012b5 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -15,7 +15,7 @@ extern "C" { #endif -static jl_sym_t *symtab = NULL; +static _Atomic(jl_sym_t*) symtab = NULL; #define MAX_SYM_LEN ((size_t)INTPTR_MAX - sizeof(jl_taggedvalue_t) - sizeof(jl_sym_t) - 1) @@ -48,9 +48,9 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT return sym; } -static jl_sym_t *symtab_lookup(jl_sym_t **ptree, const char *str, size_t len, jl_sym_t ***slot) JL_NOTSAFEPOINT +static jl_sym_t *symtab_lookup(_Atomic(jl_sym_t*) *ptree, const char *str, size_t len, _Atomic(jl_sym_t*) **slot) JL_NOTSAFEPOINT { - jl_sym_t *node = jl_atomic_load_acquire(ptree); // consume + jl_sym_t *node = jl_atomic_load_relaxed(ptree); // consume uintptr_t h = hash_symbol(str, len); // Tree nodes sorted by major key of (int(hash)) and minor key of (str). @@ -68,7 +68,7 @@ static jl_sym_t *symtab_lookup(jl_sym_t **ptree, const char *str, size_t len, jl ptree = &node->left; else ptree = &node->right; - node = jl_atomic_load_acquire(ptree); // consume + node = jl_atomic_load_relaxed(ptree); // consume } if (slot != NULL) *slot = ptree; @@ -84,7 +84,7 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT // (or throw) jl_exceptionf(jl_argumenterror_type, "Symbol name too long"); #endif assert(!memchr(str, 0, len)); - jl_sym_t **slot; + _Atomic(jl_sym_t*) *slot; jl_sym_t *node = symtab_lookup(&symtab, str, len, &slot); if (node == NULL) { JL_LOCK_NOGC(&gc_perm_lock); @@ -122,7 +122,7 @@ JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void) return symtab; } -static uint32_t gs_ctr = 0; // TODO: per-thread +static _Atomic(uint32_t) gs_ctr = 0; // TODO: per-module? uint32_t jl_get_gs_ctr(void) { return gs_ctr; } void jl_set_gs_ctr(uint32_t ctr) { gs_ctr = ctr; } diff --git a/src/threading.c b/src/threading.c index ffe53c07b45ee..5f648f68fd48f 100644 --- a/src/threading.c +++ b/src/threading.c @@ -287,8 +287,8 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) #endif jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; -uint8_t jl_measure_compile_time_enabled = 0; -uint64_t jl_cumulative_compile_time = 0; +_Atomic(uint8_t) jl_measure_compile_time_enabled = 0; +_Atomic(uint64_t) jl_cumulative_compile_time = 0; // return calling thread's ID // Also update the suspended_threads list in signals-mach when changing the