Skip to content

Commit

Permalink
update atomics.h to be more fully C11/C++11 compliant
Browse files Browse the repository at this point in the history
Fixes #42098
  • Loading branch information
vtjnash committed Sep 9, 2021
1 parent bf5ac8c commit 3e7f980
Show file tree
Hide file tree
Showing 22 changed files with 425 additions and 245 deletions.
220 changes: 183 additions & 37 deletions src/atomics.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,44 @@
#ifndef JL_ATOMICS_H
#define JL_ATOMICS_H

// Low-level atomic operations

#if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__)
# error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture.
#endif
#ifdef _COMPILER_MICROSOFT_
# include <intrin.h>
# include <type_traits>

// Low-level atomic operations
#ifdef __cplusplus
#include <atomic>
using std::memory_order_relaxed;
using std::memory_order_consume;
using std::memory_order_acquire;
using std::memory_order_release;
using std::memory_order_acq_rel;
using std::memory_order_seq_cst;
using std::atomic_thread_fence;
using std::atomic_signal_fence;
using std::atomic_load;
using std::atomic_load_explicit;
using std::atomic_store;
using std::atomic_store_explicit;
using std::atomic_fetch_add;
using std::atomic_fetch_add_explicit;
using std::atomic_fetch_and;
using std::atomic_fetch_and_explicit;
using std::atomic_fetch_or;
using std::atomic_fetch_or_explicit;
using std::atomic_compare_exchange_strong;
using std::atomic_compare_exchange_strong_explicit;
using std::atomic_exchange;
using std::atomic_exchange_explicit;
extern "C" {
#define _Atomic(T) std::atomic<T>
#else
#include <stdatomic.h>
#endif

#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
# include <immintrin.h>
#endif
#ifndef _OS_WINDOWS_
# include <pthread.h>
#endif
#include <signal.h>

enum jl_memory_order {
jl_memory_order_unspecified = -2,
Expand Down Expand Up @@ -50,73 +72,161 @@ enum jl_memory_order {
* are). We also need to access these atomic variables from the LLVM JIT code
* which is very hard unless the layout of the object is fully specified.
*/
#define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
#define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
#define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
#define jl_fence() atomic_thread_fence(memory_order_seq_cst)
#define jl_fence_release() atomic_thread_fence(memory_order_release)
#define jl_signal_fence() atomic_signal_fence(memory_order_seq_cst)

#ifdef __cplusplus
}
// implicit conversion wasn't correctly specified 2017, so many compilers get
// this wrong thus we include the correct definitions here (with implicit
// conversion), instead of using the macro version
template<class T>
T jl_atomic_load(std::atomic<T> *ptr)
{
return std::atomic_load<T>(ptr);
}
template<class T>
T jl_atomic_load_explicit(std::atomic<T> *ptr, std::memory_order order)
{
return std::atomic_load_explicit<T>(ptr, order);
}
#define jl_atomic_load_relaxed(ptr) jl_atomic_load_explicit(ptr, memory_order_relaxed)
#define jl_atomic_load_acquire(ptr) jl_atomic_load_explicit(ptr, memory_order_acquire)
template<class T, class S>
void jl_atomic_store(std::atomic<T> *ptr, S desired)
{
std::atomic_store<T>(ptr, desired);
}
template<class T, class S>
void jl_atomic_store_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
{
std::atomic_store_explicit<T>(ptr, desired, order);
}
#define jl_atomic_store_relaxed(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_relaxed)
#define jl_atomic_store_release(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_release)
template<class T, class S>
T jl_atomic_fetch_add(std::atomic<T> *ptr, S val)
{
return std::atomic_fetch_add<T>(ptr, val);
}
template<class T, class S>
T jl_atomic_fetch_add_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
{
return std::atomic_fetch_add_explicit<T>(ptr, val, order);
}
#define jl_atomic_fetch_add_relaxed(ptr, val) jl_atomic_fetch_add_explicit(ptr, val, memory_order_relaxed)
template<class T, class S>
T jl_atomic_fetch_and(std::atomic<T> *ptr, S val)
{
return std::atomic_fetch_and<T>(ptr, val);
}
template<class T, class S>
T jl_atomic_fetch_and_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
{
return std::atomic_fetch_and_explicit<T>(ptr, val, order);
}
#define jl_atomic_fetch_and_relaxed(ptr, val) jl_atomic_fetch_and_explicit(ptr, val, memory_order_relaxed)
template<class T, class S>
T jl_atomic_fetch_or(std::atomic<T> *ptr, S val)
{
return std::atomic_fetch_or<T>(ptr, val);
}
template<class T, class S>
T jl_atomic_fetch_or_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
{
return std::atomic_fetch_or_explicit<T>(ptr, val, order);
}
#define jl_atomic_fetch_or_relaxed(ptr, val) jl_atomic_fetch_or_explicit(ptr, val, memory_order_relaxed)
template<class T, class S>
bool jl_atomic_cmpswap(std::atomic<T> *ptr, T *expected, S val)
{
return std::atomic_compare_exchange_strong<T>(ptr, expected, val);
}
template<class T, class S>
bool jl_atomic_cmpswap_explicit(std::atomic<T> *ptr, T *expected, S val, std::memory_order order)
{
return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, order, order);
}
#define jl_atomic_cmpswap_relaxed(ptr, val) jl_atomic_cmpswap_explicit(ptr, val, memory_order_relaxed)
template<class T, class S>
T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
{
return std::atomic_exchange<T>(ptr, desired);
}
template<class T, class S>
T jl_atomic_exchange_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
{
return std::atomic_exchange_explicit<T>(ptr, desired, order);
}
#define jl_atomic_exchange_relaxed(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_relaxed)
extern "C" {
#else

# define jl_atomic_fetch_add_relaxed(obj, arg) \
__atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
atomic_fetch_add_explicit(obj, arg, memory_order_relaxed)
# define jl_atomic_fetch_add(obj, arg) \
__atomic_fetch_add(obj, arg, __ATOMIC_SEQ_CST)
# define jl_atomic_add_fetch(obj, arg) \
__atomic_add_fetch(obj, arg, __ATOMIC_SEQ_CST)
atomic_fetch_add(obj, arg)
# define jl_atomic_fetch_and_relaxed(obj, arg) \
__atomic_fetch_and(obj, arg, __ATOMIC_RELAXED)
atomic_fetch_and_explicit(obj, arg, memory_order_relaxed)
# define jl_atomic_fetch_and(obj, arg) \
__atomic_fetch_and(obj, arg, __ATOMIC_SEQ_CST)
atomic_fetch_and(obj, arg)
# define jl_atomic_fetch_or_relaxed(obj, arg) \
__atomic_fetch_or(obj, arg, __ATOMIC_RELAXED)
atomic_fetch_or_explicit(obj, arg, __ATOMIC_RELAXED)
# define jl_atomic_fetch_or(obj, arg) \
__atomic_fetch_or(obj, arg, __ATOMIC_SEQ_CST)
# define jl_atomic_cmpswap(obj, expected, desired) \
__atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
# define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
__atomic_compare_exchange_n(obj, expected, desired, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
atomic_fetch_or(obj, arg)
# define jl_atomic_cmpswap(obj, expected, desired) \
atomic_compare_exchange_strong(obj, expected, desired)
# define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
// TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
# define jl_atomic_exchange(obj, desired) \
__atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST)
# define jl_atomic_exchange(obj, desired) \
atomic_exchange(obj, desired)
# define jl_atomic_exchange_relaxed(obj, desired) \
__atomic_exchange_n(obj, desired, __ATOMIC_RELAXED)
atomic_exchange_explicit(obj, desired, memory_order_relaxed)
# define jl_atomic_store(obj, val) \
__atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
atomic_store(obj, val)
# define jl_atomic_store_relaxed(obj, val) \
__atomic_store_n(obj, val, __ATOMIC_RELAXED)
atomic_store_explicit(obj, val, memory_order_relaxed)

# if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
!(defined(_CPU_X86_) || defined(_CPU_X86_64_))
// ICC and Clang doesn't have this bug...
# define jl_atomic_store_release(obj, val) \
__atomic_store_n(obj, val, __ATOMIC_RELEASE)
atomic_store_explicit(obj, val, memory_order_release)
# else
// Workaround a GCC bug when using store with release order by using the
// stronger version instead.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
// fixed in https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=d8c40eff56f69877b33c697ded756d50fde90c27
# define jl_atomic_store_release(obj, val) do { \
jl_signal_fence(); \
__atomic_store_n(obj, val, __ATOMIC_RELEASE); \
atomic_store_explicit(obj, val, memory_order_release); \
} while (0)
# endif
# define jl_atomic_load(obj) \
__atomic_load_n(obj, __ATOMIC_SEQ_CST)
atomic_load(obj)
# define jl_atomic_load_acquire(obj) \
__atomic_load_n(obj, __ATOMIC_ACQUIRE)
atomic_load_explicit(obj, memory_order_acquire)
#ifdef _COMPILER_TSAN_ENABLED_
// For the sake of tsan, call these loads consume ordering since they will act
// as such on the processors we support while normally, the compiler would
// upgrade this to acquire ordering, which is strong (and slower) than we want.
# define jl_atomic_load_relaxed(obj) \
__atomic_load_n(obj, __ATOMIC_CONSUME)
atomic_load_explicit(obj, memory_order_consume)
#else
# define jl_atomic_load_relaxed(obj) \
__atomic_load_n(obj, __ATOMIC_RELAXED)
atomic_load_explicit(obj, memory_order_relaxed)
#endif
#endif

#ifdef __clang_analyzer__
// for the purposes of the analyzer, we can turn these into non-atomic expressions with similar properties
// (for the sake of the analyzer, we don't care if it is an exact match for behavior)

#undef _Atomic
#define _Atomic(T) T

#undef jl_atomic_exchange
#undef jl_atomic_exchange_relaxed
#define jl_atomic_exchange(obj, desired) \
Expand All @@ -135,11 +245,12 @@ enum jl_memory_order {
__typeof__((obj)) p__analyzer__ = (obj); \
__typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
__typeof__((expected)) x__analyzer__ = (expected); \
if (temp__analyzer__ == *x__analyzer__) \
int eq__analyzer__ = memcmp(&temp__analyzer__, x__analyzer__, sizeof(temp__analyzer__)) == 0; \
if (eq__analyzer__) \
*p__analyzer__ = (desired); \
else \
*x__analyzer__ = temp__analyzer__; \
temp__analyzer__ == *x__analyzer__; \
eq__analyzer__; \
}))
#define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap

Expand All @@ -157,7 +268,42 @@ enum jl_memory_order {
#define jl_atomic_load_acquire jl_atomic_load
#define jl_atomic_load_relaxed jl_atomic_load

#undef jl_atomic_fetch_add
#undef jl_atomic_fetch_and
#undef jl_atomic_fetch_or
#undef jl_atomic_fetch_add_relaxed
#undef jl_atomic_fetch_and_relaxed
#undef jl_atomic_fetch_or_relaxed
#define jl_atomic_fetch_add(obj, val) \
(__extension__({ \
__typeof__((obj)) p__analyzer__ = (obj); \
__typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
*(p__analyzer__) = temp__analyzer__ + (val); \
temp__analyzer__; \
}))
#define jl_atomic_fetch_and(obj, val) \
(__extension__({ \
__typeof__((obj)) p__analyzer__ = (obj); \
__typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
*(p__analyzer__) = temp__analyzer__ & (val); \
temp__analyzer__; \
}))
#define jl_atomic_fetch_or(obj, val) \
(__extension__({ \
__typeof__((obj)) p__analyzer__ = (obj); \
__typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
*(p__analyzer__) = temp__analyzer__ | (val); \
temp__analyzer__; \
}))
#define jl_atomic_fetch_add_relaxed jl_atomic_fetch_add
#define jl_atomic_fetch_and_relaxed jl_atomic_fetch_and
#define jl_atomic_fetch_or_relaxed jl_atomic_fetch_or

#endif


#ifdef __cplusplus
}
#endif

#endif // JL_ATOMICS_H
4 changes: 2 additions & 2 deletions src/cgmemmgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ static intptr_t get_anon_hdl(void)
return -1;
}

static size_t map_offset = 0;
static _Atomic(size_t) map_offset{0};
// Multiple of 128MB.
// Hopefully no one will set a ulimit for this to be a problem...
static constexpr size_t map_size_inc_default = 128 * 1024 * 1024;
Expand Down Expand Up @@ -239,7 +239,7 @@ static intptr_t init_shared_map()
anon_hdl = get_anon_hdl();
if (anon_hdl == -1)
return -1;
map_offset = 0;
jl_atomic_store_relaxed(&map_offset, 0);
map_size = get_map_size_inc();
int ret = ftruncate(anon_hdl, map_size);
if (ret != 0) {
Expand Down
28 changes: 17 additions & 11 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3674,8 +3674,9 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
else {
jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
if (ci != jl_nothing && codeinst->invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr
if (codeinst->invoke == jl_fptr_const_return) {
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
if (ci != jl_nothing && invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr
if (invoke == jl_fptr_const_return) {
result = mark_julia_const(codeinst->rettype_const);
handled = true;
}
Expand All @@ -3688,9 +3689,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
if (ctx.use_cache) {
// optimization: emit the correct name immediately, if we know it
// TODO: use `emitted` map here too to try to consolidate names?
if (codeinst->specptr.fptr) {
if (specsig ? codeinst->isspecsig : codeinst->invoke == jl_fptr_args) {
protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
if (fptr) {
if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args) {
protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
need_to_emit = false;
}
}
Expand Down Expand Up @@ -5059,8 +5062,9 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
ctx.builder.SetInsertPoint(b0);
Function *theFunc;
Value *theFarg;
if (params.cache && codeinst->invoke != NULL) {
StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->invoke, codeinst);
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
if (params.cache && invoke != NULL) {
StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
theFunc = cast<Function>(
M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(jl_LLVMContext)).getCallee());
theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst);
Expand Down Expand Up @@ -7820,12 +7824,14 @@ void jl_compile_workqueue(
"invalid world for code-instance");
StringRef preal_decl = "";
bool preal_specsig = false;
if (params.cache && codeinst->invoke != NULL) {
if (codeinst->invoke == jl_fptr_args) {
preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
if (params.cache && invoke != NULL) {
auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
if (invoke == jl_fptr_args) {
preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
}
else if (codeinst->isspecsig) {
preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
preal_specsig = true;
}
}
Expand Down
Loading

0 comments on commit 3e7f980

Please sign in to comment.