update atomics.h to be more fully C11/C++11 compliant

Fixes #42098
JuliaLang · Sep 9, 2021 · 3e7f980 · 3e7f980
1 parent bf5ac8c
commit 3e7f980
Show file tree

Hide file tree

Showing 22 changed files with 425 additions and 245 deletions.
diff --git a/src/atomics.h b/src/atomics.h
@@ -3,22 +3,44 @@
 #ifndef JL_ATOMICS_H
 #define JL_ATOMICS_H
 
-// Low-level atomic operations
-
 #if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__)
 #  error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture.
 #endif
-#ifdef _COMPILER_MICROSOFT_
-#  include <intrin.h>
-#  include <type_traits>
+
+// Low-level atomic operations
+#ifdef __cplusplus
+#include <atomic>
+using std::memory_order_relaxed;
+using std::memory_order_consume;
+using std::memory_order_acquire;
+using std::memory_order_release;
+using std::memory_order_acq_rel;
+using std::memory_order_seq_cst;
+using std::atomic_thread_fence;
+using std::atomic_signal_fence;
+using std::atomic_load;
+using std::atomic_load_explicit;
+using std::atomic_store;
+using std::atomic_store_explicit;
+using std::atomic_fetch_add;
+using std::atomic_fetch_add_explicit;
+using std::atomic_fetch_and;
+using std::atomic_fetch_and_explicit;
+using std::atomic_fetch_or;
+using std::atomic_fetch_or_explicit;
+using std::atomic_compare_exchange_strong;
+using std::atomic_compare_exchange_strong_explicit;
+using std::atomic_exchange;
+using std::atomic_exchange_explicit;
+extern "C" {
+#define _Atomic(T) std::atomic<T>
+#else
+#include <stdatomic.h>
 #endif
+
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_)
 #  include <immintrin.h>
 #endif
-#ifndef _OS_WINDOWS_
-#  include <pthread.h>
-#endif
-#include <signal.h>
 
 enum jl_memory_order {
     jl_memory_order_unspecified = -2,
@@ -50,73 +72,161 @@ enum jl_memory_order {
  * are). We also need to access these atomic variables from the LLVM JIT code
  * which is very hard unless the layout of the object is fully specified.
  */
-#define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
-#define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
-#define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
+#define jl_fence() atomic_thread_fence(memory_order_seq_cst)
+#define jl_fence_release() atomic_thread_fence(memory_order_release)
+#define jl_signal_fence() atomic_signal_fence(memory_order_seq_cst)
 
+#ifdef __cplusplus
+}
+// implicit conversion wasn't correctly specified 2017, so many compilers get
+// this wrong thus we include the correct definitions here (with implicit
+// conversion), instead of using the macro version
+template<class T>
+T jl_atomic_load(std::atomic<T> *ptr)
+{
+     return std::atomic_load<T>(ptr);
+}
+template<class T>
+T jl_atomic_load_explicit(std::atomic<T> *ptr, std::memory_order order)
+{
+     return std::atomic_load_explicit<T>(ptr, order);
+}
+#define jl_atomic_load_relaxed(ptr) jl_atomic_load_explicit(ptr, memory_order_relaxed)
+#define jl_atomic_load_acquire(ptr) jl_atomic_load_explicit(ptr, memory_order_acquire)
+template<class T, class S>
+void jl_atomic_store(std::atomic<T> *ptr, S desired)
+{
+     std::atomic_store<T>(ptr, desired);
+}
+template<class T, class S>
+void jl_atomic_store_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
+{
+     std::atomic_store_explicit<T>(ptr, desired, order);
+}
+#define jl_atomic_store_relaxed(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_relaxed)
+#define jl_atomic_store_release(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_release)
+template<class T, class S>
+T jl_atomic_fetch_add(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_add<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_add_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_add_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_add_relaxed(ptr, val) jl_atomic_fetch_add_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_fetch_and(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_and<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_and_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_and_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_and_relaxed(ptr, val) jl_atomic_fetch_and_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_fetch_or(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_or<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_or_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_or_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_or_relaxed(ptr, val) jl_atomic_fetch_or_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+bool jl_atomic_cmpswap(std::atomic<T> *ptr, T *expected, S val)
+{
+     return std::atomic_compare_exchange_strong<T>(ptr, expected, val);
+}
+template<class T, class S>
+bool jl_atomic_cmpswap_explicit(std::atomic<T> *ptr, T *expected, S val, std::memory_order order)
+{
+     return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, order, order);
+}
+#define jl_atomic_cmpswap_relaxed(ptr, val) jl_atomic_cmpswap_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
+{
+     return std::atomic_exchange<T>(ptr, desired);
+}
+template<class T, class S>
+T jl_atomic_exchange_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
+{
+     return std::atomic_exchange_explicit<T>(ptr, desired, order);
+}
+#define jl_atomic_exchange_relaxed(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_relaxed)
+extern "C" {
+#else
 
 #  define jl_atomic_fetch_add_relaxed(obj, arg)         \
-    __atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
+    atomic_fetch_add_explicit(obj, arg, memory_order_relaxed)
 #  define jl_atomic_fetch_add(obj, arg)                 \
-    __atomic_fetch_add(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_add_fetch(obj, arg)                 \
-    __atomic_add_fetch(obj, arg, __ATOMIC_SEQ_CST)
+    atomic_fetch_add(obj, arg)
 #  define jl_atomic_fetch_and_relaxed(obj, arg)         \
-    __atomic_fetch_and(obj, arg, __ATOMIC_RELAXED)
+    atomic_fetch_and_explicit(obj, arg, memory_order_relaxed)
 #  define jl_atomic_fetch_and(obj, arg)                 \
-    __atomic_fetch_and(obj, arg, __ATOMIC_SEQ_CST)
+    atomic_fetch_and(obj, arg)
 #  define jl_atomic_fetch_or_relaxed(obj, arg)          \
-    __atomic_fetch_or(obj, arg, __ATOMIC_RELAXED)
+    atomic_fetch_or_explicit(obj, arg, __ATOMIC_RELAXED)
 #  define jl_atomic_fetch_or(obj, arg)                  \
-    __atomic_fetch_or(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_cmpswap(obj, expected, desired)    \
-    __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-#  define jl_atomic_cmpswap_relaxed(obj, expected, desired)    \
-    __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+    atomic_fetch_or(obj, arg)
+#  define jl_atomic_cmpswap(obj, expected, desired)     \
+    atomic_compare_exchange_strong(obj, expected, desired)
+#  define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
+    atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
 // TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
-#  define jl_atomic_exchange(obj, desired)              \
-    __atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST)
+#  define jl_atomic_exchange(obj, desired)       \
+    atomic_exchange(obj, desired)
 #  define jl_atomic_exchange_relaxed(obj, desired)      \
-    __atomic_exchange_n(obj, desired, __ATOMIC_RELAXED)
+    atomic_exchange_explicit(obj, desired, memory_order_relaxed)
 #  define jl_atomic_store(obj, val)                     \
-    __atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
+    atomic_store(obj, val)
 #  define jl_atomic_store_relaxed(obj, val)             \
-    __atomic_store_n(obj, val, __ATOMIC_RELAXED)
+    atomic_store_explicit(obj, val, memory_order_relaxed)
 
 #  if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
     !(defined(_CPU_X86_) || defined(_CPU_X86_64_))
 // ICC and Clang doesn't have this bug...
 #    define jl_atomic_store_release(obj, val)           \
-    __atomic_store_n(obj, val, __ATOMIC_RELEASE)
+    atomic_store_explicit(obj, val, memory_order_release)
 #  else
 // Workaround a GCC bug when using store with release order by using the
 // stronger version instead.
 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
 // fixed in https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=d8c40eff56f69877b33c697ded756d50fde90c27
 #    define jl_atomic_store_release(obj, val) do {      \
         jl_signal_fence();                              \
-        __atomic_store_n(obj, val, __ATOMIC_RELEASE);   \
+        atomic_store_explicit(obj, val, memory_order_release);   \
     } while (0)
 #  endif
 #  define jl_atomic_load(obj)                   \
-    __atomic_load_n(obj, __ATOMIC_SEQ_CST)
+    atomic_load(obj)
 #  define jl_atomic_load_acquire(obj)           \
-    __atomic_load_n(obj, __ATOMIC_ACQUIRE)
+    atomic_load_explicit(obj, memory_order_acquire)
 #ifdef _COMPILER_TSAN_ENABLED_
 // For the sake of tsan, call these loads consume ordering since they will act
 // as such on the processors we support while normally, the compiler would
 // upgrade this to acquire ordering, which is strong (and slower) than we want.
 #  define jl_atomic_load_relaxed(obj)           \
-    __atomic_load_n(obj, __ATOMIC_CONSUME)
+    atomic_load_explicit(obj, memory_order_consume)
 #else
 #  define jl_atomic_load_relaxed(obj)           \
-    __atomic_load_n(obj, __ATOMIC_RELAXED)
+    atomic_load_explicit(obj, memory_order_relaxed)
+#endif
 #endif
 
 #ifdef __clang_analyzer__
 // for the purposes of the analyzer, we can turn these into non-atomic expressions with similar properties
 // (for the sake of the analyzer, we don't care if it is an exact match for behavior)
 
+#undef _Atomic
+#define _Atomic(T) T
+
 #undef jl_atomic_exchange
 #undef jl_atomic_exchange_relaxed
 #define jl_atomic_exchange(obj, desired) \
@@ -135,11 +245,12 @@ enum jl_memory_order {
             __typeof__((obj)) p__analyzer__ = (obj); \
             __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
             __typeof__((expected)) x__analyzer__ = (expected); \
-            if (temp__analyzer__ == *x__analyzer__) \
+            int eq__analyzer__ = memcmp(&temp__analyzer__, x__analyzer__, sizeof(temp__analyzer__)) == 0; \
+            if (eq__analyzer__) \
                 *p__analyzer__ = (desired); \
             else \
                 *x__analyzer__ = temp__analyzer__; \
-            temp__analyzer__ == *x__analyzer__; \
+            eq__analyzer__; \
         }))
 #define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
 
@@ -157,7 +268,42 @@ enum jl_memory_order {
 #define jl_atomic_load_acquire jl_atomic_load
 #define jl_atomic_load_relaxed jl_atomic_load
 
+#undef jl_atomic_fetch_add
+#undef jl_atomic_fetch_and
+#undef jl_atomic_fetch_or
+#undef jl_atomic_fetch_add_relaxed
+#undef jl_atomic_fetch_and_relaxed
+#undef jl_atomic_fetch_or_relaxed
+#define jl_atomic_fetch_add(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ + (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_and(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ & (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_or(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ | (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_add_relaxed jl_atomic_fetch_add
+#define jl_atomic_fetch_and_relaxed jl_atomic_fetch_and
+#define jl_atomic_fetch_or_relaxed jl_atomic_fetch_or
+
 #endif
 
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif // JL_ATOMICS_H
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
@@ -205,7 +205,7 @@ static intptr_t get_anon_hdl(void)
     return -1;
 }
 
-static size_t map_offset = 0;
+static _Atomic(size_t) map_offset{0};
 // Multiple of 128MB.
 // Hopefully no one will set a ulimit for this to be a problem...
 static constexpr size_t map_size_inc_default = 128 * 1024 * 1024;
@@ -239,7 +239,7 @@ static intptr_t init_shared_map()
     anon_hdl = get_anon_hdl();
     if (anon_hdl == -1)
         return -1;
-    map_offset = 0;
+    jl_atomic_store_relaxed(&map_offset, 0);
     map_size = get_map_size_inc();
     int ret = ftruncate(anon_hdl, map_size);
     if (ret != 0) {

diff --git a/src/codegen.cpp b/src/codegen.cpp
@@ -3674,8 +3674,9 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
         else {
             jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
             jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            if (ci != jl_nothing && codeinst->invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr
-                if (codeinst->invoke == jl_fptr_const_return) {
+            auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+            if (ci != jl_nothing && invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr
+                if (invoke == jl_fptr_const_return) {
                     result = mark_julia_const(codeinst->rettype_const);
                     handled = true;
                 }
@@ -3688,9 +3689,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
                     if (ctx.use_cache) {
                         // optimization: emit the correct name immediately, if we know it
                         // TODO: use `emitted` map here too to try to consolidate names?
-                        if (codeinst->specptr.fptr) {
-                            if (specsig ? codeinst->isspecsig : codeinst->invoke == jl_fptr_args) {
-                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+                        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+                        if (fptr) {
+                            if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args) {
+                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                                 need_to_emit = false;
                             }
                         }
@@ -5059,8 +5062,9 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     ctx.builder.SetInsertPoint(b0);
     Function *theFunc;
     Value *theFarg;
-    if (params.cache && codeinst->invoke != NULL) {
-        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->invoke, codeinst);
+    auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+    if (params.cache && invoke != NULL) {
+        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
         theFunc = cast<Function>(
             M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(jl_LLVMContext)).getCallee());
         theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst);
@@ -7820,12 +7824,14 @@ void jl_compile_workqueue(
             "invalid world for code-instance");
         StringRef preal_decl = "";
         bool preal_specsig = false;
-        if (params.cache && codeinst->invoke != NULL) {
-            if (codeinst->invoke == jl_fptr_args) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        if (params.cache && invoke != NULL) {
+            auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+            if (invoke == jl_fptr_args) {
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
             }
             else if (codeinst->isspecsig) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                 preal_specsig = true;
             }
         }