update atomics.h to be more fully C11/C++11 compliant

Fixes #42098
JuliaLang · Sep 10, 2021 · 233cc24 · 233cc24
1 parent c4f0d8b
commit 233cc24
Show file tree

Hide file tree

Showing 22 changed files with 430 additions and 249 deletions.
diff --git a/src/atomics.h b/src/atomics.h
@@ -3,22 +3,45 @@
 #ifndef JL_ATOMICS_H
 #define JL_ATOMICS_H
 
-// Low-level atomic operations
-
 #if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__)
 # error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture.
 #endif
-#ifdef _COMPILER_MICROSOFT_
-# include <intrin.h>
-# include <type_traits>
+
+// Low-level atomic operations
+#ifdef __cplusplus
+#include <atomic>
+using std::memory_order_relaxed;
+using std::memory_order_consume;
+using std::memory_order_acquire;
+using std::memory_order_release;
+using std::memory_order_acq_rel;
+using std::memory_order_seq_cst;
+using std::atomic_thread_fence;
+using std::atomic_signal_fence;
+using std::atomic_load;
+using std::atomic_load_explicit;
+using std::atomic_store;
+using std::atomic_store_explicit;
+using std::atomic_fetch_add;
+using std::atomic_fetch_add_explicit;
+using std::atomic_fetch_and;
+using std::atomic_fetch_and_explicit;
+using std::atomic_fetch_or;
+using std::atomic_fetch_or_explicit;
+using std::atomic_compare_exchange_strong;
+using std::atomic_compare_exchange_strong_explicit;
+using std::atomic_exchange;
+using std::atomic_exchange_explicit;
+extern "C" {
+#define _Atomic(T) std::atomic<T>
+#else
+#include <stdatomic.h>
 #endif
+#include <signal.h> // for sig_atomic_t
+
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_)
 # include <immintrin.h>
 #endif
-#ifndef _OS_WINDOWS_
-# include <pthread.h>
-#endif
-#include <signal.h>
 
 enum jl_memory_order {
  jl_memory_order_unspecified = -2,
@@ -50,73 +73,161 @@ enum jl_memory_order {
  * are). We also need to access these atomic variables from the LLVM JIT code
  * which is very hard unless the layout of the object is fully specified.
  */
-#define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
-#define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
-#define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
+#define jl_fence() atomic_thread_fence(memory_order_seq_cst)
+#define jl_fence_release() atomic_thread_fence(memory_order_release)
+#define jl_signal_fence() atomic_signal_fence(memory_order_seq_cst)
 
+#ifdef __cplusplus
+}
+// implicit conversion wasn't correctly specified 2017, so many compilers get
+// this wrong thus we include the correct definitions here (with implicit
+// conversion), instead of using the macro version
+template<class T>
+T jl_atomic_load(std::atomic<T> *ptr)
+{
+ return std::atomic_load<T>(ptr);
+}
+template<class T>
+T jl_atomic_load_explicit(std::atomic<T> *ptr, std::memory_order order)
+{
+ return std::atomic_load_explicit<T>(ptr, order);
+}
+#define jl_atomic_load_relaxed(ptr) jl_atomic_load_explicit(ptr, memory_order_relaxed)
+#define jl_atomic_load_acquire(ptr) jl_atomic_load_explicit(ptr, memory_order_acquire)
+template<class T, class S>
+void jl_atomic_store(std::atomic<T> *ptr, S desired)
+{
+ std::atomic_store<T>(ptr, desired);
+}
+template<class T, class S>
+void jl_atomic_store_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
+{
+ std::atomic_store_explicit<T>(ptr, desired, order);
+}
+#define jl_atomic_store_relaxed(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_relaxed)
+#define jl_atomic_store_release(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_release)
+template<class T, class S>
+T jl_atomic_fetch_add(std::atomic<T> *ptr, S val)
+{
+ return std::atomic_fetch_add<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_add_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+ return std::atomic_fetch_add_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_add_relaxed(ptr, val) jl_atomic_fetch_add_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_fetch_and(std::atomic<T> *ptr, S val)
+{
+ return std::atomic_fetch_and<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_and_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+ return std::atomic_fetch_and_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_and_relaxed(ptr, val) jl_atomic_fetch_and_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_fetch_or(std::atomic<T> *ptr, S val)
+{
+ return std::atomic_fetch_or<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_or_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+ return std::atomic_fetch_or_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_or_relaxed(ptr, val) jl_atomic_fetch_or_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+bool jl_atomic_cmpswap(std::atomic<T> *ptr, T *expected, S val)
+{
+ return std::atomic_compare_exchange_strong<T>(ptr, expected, val);
+}
+template<class T, class S>
+bool jl_atomic_cmpswap_explicit(std::atomic<T> *ptr, T *expected, S val, std::memory_order order)
+{
+ return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, order, order);
+}
+#define jl_atomic_cmpswap_relaxed(ptr, val) jl_atomic_cmpswap_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
+{
+ return std::atomic_exchange<T>(ptr, desired);
+}
+template<class T, class S>
+T jl_atomic_exchange_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
+{
+ return std::atomic_exchange_explicit<T>(ptr, desired, order);
+}
+#define jl_atomic_exchange_relaxed(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_relaxed)
+extern "C" {
+#else
 
 # define jl_atomic_fetch_add_relaxed(obj, arg) \
- __atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
+ atomic_fetch_add_explicit(obj, arg, memory_order_relaxed)
 # define jl_atomic_fetch_add(obj, arg) \
- __atomic_fetch_add(obj, arg, __ATOMIC_SEQ_CST)
-# define jl_atomic_add_fetch(obj, arg) \
- __atomic_add_fetch(obj, arg, __ATOMIC_SEQ_CST)
+ atomic_fetch_add(obj, arg)
 # define jl_atomic_fetch_and_relaxed(obj, arg) \
- __atomic_fetch_and(obj, arg, __ATOMIC_RELAXED)
+ atomic_fetch_and_explicit(obj, arg, memory_order_relaxed)
 # define jl_atomic_fetch_and(obj, arg) \
- __atomic_fetch_and(obj, arg, __ATOMIC_SEQ_CST)
+ atomic_fetch_and(obj, arg)
 # define jl_atomic_fetch_or_relaxed(obj, arg) \
- __atomic_fetch_or(obj, arg, __ATOMIC_RELAXED)
+ atomic_fetch_or_explicit(obj, arg, __ATOMIC_RELAXED)
 # define jl_atomic_fetch_or(obj, arg) \
- __atomic_fetch_or(obj, arg, __ATOMIC_SEQ_CST)
-# define jl_atomic_cmpswap(obj, expected, desired) \
- __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
-# define jl_atomic_cmpswap_relaxed(obj, expected, desired)  \
- __atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
+ atomic_fetch_or(obj, arg)
+# define jl_atomic_cmpswap(obj, expected, desired)  \
+ atomic_compare_exchange_strong(obj, expected, desired)
+# define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
+ atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
 // TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
-# define jl_atomic_exchange(obj, desired)  \
- __atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST)
+# define jl_atomic_exchange(obj, desired) \
+ atomic_exchange(obj, desired)
 # define jl_atomic_exchange_relaxed(obj, desired) \
- __atomic_exchange_n(obj, desired, __ATOMIC_RELAXED)
+ atomic_exchange_explicit(obj, desired, memory_order_relaxed)
 # define jl_atomic_store(obj, val) \
- __atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
+ atomic_store(obj, val)
 # define jl_atomic_store_relaxed(obj, val) \
- __atomic_store_n(obj, val, __ATOMIC_RELAXED)
+ atomic_store_explicit(obj, val, memory_order_relaxed)
 
 # if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
  !(defined(_CPU_X86_) || defined(_CPU_X86_64_))
 // ICC and Clang doesn't have this bug...
 # define jl_atomic_store_release(obj, val) \
- __atomic_store_n(obj, val, __ATOMIC_RELEASE)
+ atomic_store_explicit(obj, val, memory_order_release)
 # else
 // Workaround a GCC bug when using store with release order by using the
 // stronger version instead.
 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
 // fixed in https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=d8c40eff56f69877b33c697ded756d50fde90c27
 # define jl_atomic_store_release(obj, val) do { \
  jl_signal_fence(); \
- __atomic_store_n(obj, val, __ATOMIC_RELEASE); \
+ atomic_store_explicit(obj, val, memory_order_release); \
  } while (0)
 # endif
 # define jl_atomic_load(obj) \
- __atomic_load_n(obj, __ATOMIC_SEQ_CST)
+ atomic_load(obj)
 # define jl_atomic_load_acquire(obj) \
- __atomic_load_n(obj, __ATOMIC_ACQUIRE)
+ atomic_load_explicit(obj, memory_order_acquire)
 #ifdef _COMPILER_TSAN_ENABLED_
 // For the sake of tsan, call these loads consume ordering since they will act
 // as such on the processors we support while normally, the compiler would
 // upgrade this to acquire ordering, which is strong (and slower) than we want.
 # define jl_atomic_load_relaxed(obj) \
- __atomic_load_n(obj, __ATOMIC_CONSUME)
+ atomic_load_explicit(obj, memory_order_consume)
 #else
 # define jl_atomic_load_relaxed(obj) \
- __atomic_load_n(obj, __ATOMIC_RELAXED)
+ atomic_load_explicit(obj, memory_order_relaxed)
+#endif
 #endif
 
 #ifdef __clang_analyzer__
 // for the purposes of the analyzer, we can turn these into non-atomic expressions with similar properties
 // (for the sake of the analyzer, we don't care if it is an exact match for behavior)
 
+#undef _Atomic
+#define _Atomic(T) T
+
 #undef jl_atomic_exchange
 #undef jl_atomic_exchange_relaxed
 #define jl_atomic_exchange(obj, desired) \
@@ -135,11 +246,12 @@ enum jl_memory_order {
  __typeof__((obj)) p__analyzer__ = (obj); \
  __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
  __typeof__((expected)) x__analyzer__ = (expected); \
- if (temp__analyzer__ == *x__analyzer__) \
+ int eq__analyzer__ = memcmp(&temp__analyzer__, x__analyzer__, sizeof(temp__analyzer__)) == 0; \
+ if (eq__analyzer__) \
  *p__analyzer__ = (desired); \
  else \
  *x__analyzer__ = temp__analyzer__; \
- temp__analyzer__ == *x__analyzer__; \
+ eq__analyzer__; \
  }))
 #define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
 
@@ -157,7 +269,42 @@ enum jl_memory_order {
 #define jl_atomic_load_acquire jl_atomic_load
 #define jl_atomic_load_relaxed jl_atomic_load
 
+#undef jl_atomic_fetch_add
+#undef jl_atomic_fetch_and
+#undef jl_atomic_fetch_or
+#undef jl_atomic_fetch_add_relaxed
+#undef jl_atomic_fetch_and_relaxed
+#undef jl_atomic_fetch_or_relaxed
+#define jl_atomic_fetch_add(obj, val) \
+ (__extension__({ \
+ __typeof__((obj)) p__analyzer__ = (obj); \
+ __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+ *(p__analyzer__) = temp__analyzer__ + (val); \
+ temp__analyzer__; \
+ }))
+#define jl_atomic_fetch_and(obj, val) \
+ (__extension__({ \
+ __typeof__((obj)) p__analyzer__ = (obj); \
+ __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+ *(p__analyzer__) = temp__analyzer__ & (val); \
+ temp__analyzer__; \
+ }))
+#define jl_atomic_fetch_or(obj, val) \
+ (__extension__({ \
+ __typeof__((obj)) p__analyzer__ = (obj); \
+ __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+ *(p__analyzer__) = temp__analyzer__ | (val); \
+ temp__analyzer__; \
+ }))
+#define jl_atomic_fetch_add_relaxed jl_atomic_fetch_add
+#define jl_atomic_fetch_and_relaxed jl_atomic_fetch_and
+#define jl_atomic_fetch_or_relaxed jl_atomic_fetch_or
+
 #endif
 
 
+#ifdef __cplusplus
+}
+#endif
+
 #endif // JL_ATOMICS_H
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
@@ -205,7 +205,7 @@ static intptr_t get_anon_hdl(void)
  return -1;
 }
 
-static size_t map_offset = 0;
+static _Atomic(size_t) map_offset{0};
 // Multiple of 128MB.
 // Hopefully no one will set a ulimit for this to be a problem...
 static constexpr size_t map_size_inc_default = 128 * 1024 * 1024;
@@ -239,7 +239,7 @@ static intptr_t init_shared_map()
  anon_hdl = get_anon_hdl();
  if (anon_hdl == -1)
  return -1;
- map_offset = 0;
+ jl_atomic_store_relaxed(&map_offset, 0);
  map_size = get_map_size_inc();
  int ret = ftruncate(anon_hdl, map_size);
  if (ret != 0) {

diff --git a/src/codegen.cpp b/src/codegen.cpp
@@ -3674,12 +3674,14 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
  else {
  jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
  jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
- if (ci != jl_nothing && codeinst->invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr
- if (codeinst->invoke == jl_fptr_const_return) {
+ if (ci != jl_nothing) {
+ auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+ // check if we know how to handle this specptr
+ if (invoke == jl_fptr_const_return) {
  result = mark_julia_const(codeinst->rettype_const);
  handled = true;
  }
- else {
+ else if (invoke != jl_fptr_sparam) {
  bool specsig, needsparams;
  std::tie(specsig, needsparams) = uses_specsig(mi, codeinst->rettype, ctx.params->prefer_specsig);
  std::string name;
@@ -3688,9 +3690,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
  if (ctx.use_cache) {
  // optimization: emit the correct name immediately, if we know it
  // TODO: use `emitted` map here too to try to consolidate names?
- if (codeinst->specptr.fptr) {
- if (specsig ? codeinst->isspecsig : codeinst->invoke == jl_fptr_args) {
- protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+ auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+ auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+ if (fptr) {
+ if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args) {
+ protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
  need_to_emit = false;
  }
  }
@@ -5059,8 +5063,9 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
  ctx.builder.SetInsertPoint(b0);
  Function *theFunc;
  Value *theFarg;
- if (params.cache && codeinst->invoke != NULL) {
- StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->invoke, codeinst);
+ auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+ if (params.cache && invoke != NULL) {
+ StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
  theFunc = cast<Function>(
  M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(jl_LLVMContext)).getCallee());
  theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst);
@@ -7820,12 +7825,14 @@ void jl_compile_workqueue(
  "invalid world for code-instance");
  StringRef preal_decl = "";
  bool preal_specsig = false;
- if (params.cache && codeinst->invoke != NULL) {
- if (codeinst->invoke == jl_fptr_args) {
- preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+ auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+ if (params.cache && invoke != NULL) {
+ auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+ if (invoke == jl_fptr_args) {
+ preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
  }
  else if (codeinst->isspecsig) {
- preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+ preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
  preal_specsig = true;
  }
  }