|
| 1 | +/* ---------------------------------------------------------------------------- |
| 2 | +Copyright (c) 2018-2021 Microsoft Research, Daan Leijen |
| 3 | +This is free software; you can redistribute it and/or modify it under the |
| 4 | +terms of the MIT license. A copy of the license can be found in the file |
| 5 | +"LICENSE" at the root of this distribution. |
| 6 | +-----------------------------------------------------------------------------*/ |
| 7 | +#pragma once |
| 8 | +#ifndef MIMALLOC_ATOMIC_H |
| 9 | +#define MIMALLOC_ATOMIC_H |
| 10 | + |
| 11 | +// -------------------------------------------------------------------------------------------- |
| 12 | +// Atomics |
| 13 | +// We need to be portable between C, C++, and MSVC. |
| 14 | +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. |
| 15 | +// This is why we try to use only `uintptr_t` and `<type>*` as atomic types. |
| 16 | +// To gain better insight in the range of used atomics, we use explicitly named memory order operations |
| 17 | +// instead of passing the memory order as a parameter. |
| 18 | +// ----------------------------------------------------------------------------------------------- |
| 19 | + |
| 20 | +#if defined(__cplusplus) |
| 21 | +// Use C++ atomics |
| 22 | +#include <atomic> |
| 23 | +#define _Atomic(tp) std::atomic<tp> |
| 24 | +#define mi_atomic(name) std::atomic_##name |
| 25 | +#define mi_memory_order(name) std::memory_order_##name |
| 26 | +#if !defined(ATOMIC_VAR_INIT) || (__cplusplus >= 202002L) // c++20, see issue #571 |
| 27 | + #define MI_ATOMIC_VAR_INIT(x) x |
| 28 | +#else |
| 29 | + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) |
| 30 | +#endif |
| 31 | +#elif defined(_MSC_VER) |
| 32 | +// Use MSVC C wrapper for C11 atomics |
| 33 | +#define _Atomic(tp) tp |
| 34 | +#define MI_ATOMIC_VAR_INIT(x) x |
| 35 | +#define mi_atomic(name) mi_atomic_##name |
| 36 | +#define mi_memory_order(name) mi_memory_order_##name |
| 37 | +#else |
| 38 | +// Use C11 atomics |
| 39 | +#include <stdatomic.h> |
| 40 | +#define mi_atomic(name) atomic_##name |
| 41 | +#define mi_memory_order(name) memory_order_##name |
| 42 | +#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) |
| 43 | +#endif |
| 44 | + |
| 45 | +// Various defines for all used memory orders in mimalloc |
| 46 | +#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ |
| 47 | + mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) |
| 48 | + |
| 49 | +#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ |
| 50 | + mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) |
| 51 | + |
| 52 | +#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) |
| 53 | +#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) |
| 54 | +#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) |
| 55 | +#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) |
| 56 | +#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) |
| 57 | +#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) |
| 58 | +#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) |
| 59 | +#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) |
| 60 | +#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) |
| 61 | +#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) |
| 62 | + |
| 63 | +#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) |
| 64 | +#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) |
| 65 | +#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) |
| 66 | +#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) |
| 67 | +#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) |
| 68 | +#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) |
| 69 | + |
| 70 | +#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1) |
| 71 | +#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1) |
| 72 | +#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1) |
| 73 | +#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1) |
| 74 | + |
| 75 | +static inline void mi_atomic_yield(void); |
| 76 | +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add); |
| 77 | +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); |
| 78 | + |
| 79 | + |
| 80 | +#if defined(__cplusplus) || !defined(_MSC_VER) |
| 81 | + |
| 82 | +// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value) |
| 83 | +// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well |
| 84 | +#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) |
| 85 | +#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) |
| 86 | + |
| 87 | +// In C++ we need to add casts to help resolve templates if NULL is passed |
| 88 | +#if defined(__cplusplus) |
| 89 | +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x) |
| 90 | +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x) |
| 91 | +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) |
| 92 | +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) |
| 93 | +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) |
| 94 | +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) |
| 95 | +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) |
| 96 | +#else |
| 97 | +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) |
| 98 | +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) |
| 99 | +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) |
| 100 | +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) |
| 101 | +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) |
| 102 | +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) |
| 103 | +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) |
| 104 | +#endif |
| 105 | + |
| 106 | +// These are used by the statistics |
| 107 | +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { |
| 108 | + return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); |
| 109 | +} |
| 110 | +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { |
| 111 | + int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); |
| 112 | + while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; |
| 113 | +} |
| 114 | + |
| 115 | +// Used by timers |
| 116 | +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) |
| 117 | +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) |
| 118 | +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) |
| 119 | +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) |
| 120 | + |
| 121 | + |
| 122 | + |
| 123 | +#elif defined(_MSC_VER) |
| 124 | + |
| 125 | +// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. |
| 126 | +#define WIN32_LEAN_AND_MEAN |
| 127 | +#include <windows.h> |
| 128 | +#include <intrin.h> |
| 129 | +#ifdef _WIN64 |
| 130 | +typedef LONG64 msc_intptr_t; |
| 131 | +#define MI_64(f) f##64 |
| 132 | +#else |
| 133 | +typedef LONG msc_intptr_t; |
| 134 | +#define MI_64(f) f |
| 135 | +#endif |
| 136 | + |
| 137 | +typedef enum mi_memory_order_e { |
| 138 | + mi_memory_order_relaxed, |
| 139 | + mi_memory_order_consume, |
| 140 | + mi_memory_order_acquire, |
| 141 | + mi_memory_order_release, |
| 142 | + mi_memory_order_acq_rel, |
| 143 | + mi_memory_order_seq_cst |
| 144 | +} mi_memory_order; |
| 145 | + |
| 146 | +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) { |
| 147 | + (void)(mo); |
| 148 | + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); |
| 149 | +} |
| 150 | +static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { |
| 151 | + (void)(mo); |
| 152 | + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); |
| 153 | +} |
| 154 | +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { |
| 155 | + (void)(mo); |
| 156 | + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); |
| 157 | +} |
| 158 | +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { |
| 159 | + (void)(mo); |
| 160 | + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); |
| 161 | +} |
| 162 | +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { |
| 163 | + (void)(mo1); (void)(mo2); |
| 164 | + uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); |
| 165 | + if (read == *expected) { |
| 166 | + return true; |
| 167 | + } |
| 168 | + else { |
| 169 | + *expected = read; |
| 170 | + return false; |
| 171 | + } |
| 172 | +} |
| 173 | +static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { |
| 174 | + return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); |
| 175 | +} |
| 176 | +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) { |
| 177 | + (void)(mo); |
| 178 | + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); |
| 179 | +} |
| 180 | +static inline void mi_atomic_thread_fence(mi_memory_order mo) { |
| 181 | + (void)(mo); |
| 182 | + _Atomic(uintptr_t) x = 0; |
| 183 | + mi_atomic_exchange_explicit(&x, 1, mo); |
| 184 | +} |
| 185 | +static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { |
| 186 | + (void)(mo); |
| 187 | +#if defined(_M_IX86) || defined(_M_X64) |
| 188 | + return *p; |
| 189 | +#else |
| 190 | + uintptr_t x = *p; |
| 191 | + if (mo > mi_memory_order_relaxed) { |
| 192 | + while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; |
| 193 | + } |
| 194 | + return x; |
| 195 | +#endif |
| 196 | +} |
| 197 | +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { |
| 198 | + (void)(mo); |
| 199 | +#if defined(_M_IX86) || defined(_M_X64) |
| 200 | + *p = x; |
| 201 | +#else |
| 202 | + mi_atomic_exchange_explicit(p, x, mo); |
| 203 | +#endif |
| 204 | +} |
| 205 | +static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) { |
| 206 | + (void)(mo); |
| 207 | +#if defined(_M_X64) |
| 208 | + return *p; |
| 209 | +#else |
| 210 | + int64_t old = *p; |
| 211 | + int64_t x = old; |
| 212 | + while ((old = InterlockedCompareExchange64(p, x, old)) != x) { |
| 213 | + x = old; |
| 214 | + } |
| 215 | + return x; |
| 216 | +#endif |
| 217 | +} |
| 218 | +static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) { |
| 219 | + (void)(mo); |
| 220 | +#if defined(x_M_IX86) || defined(_M_X64) |
| 221 | + *p = x; |
| 222 | +#else |
| 223 | + InterlockedExchange64(p, x); |
| 224 | +#endif |
| 225 | +} |
| 226 | + |
| 227 | +// These are used by the statistics |
| 228 | +static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { |
| 229 | +#ifdef _WIN64 |
| 230 | + return (int64_t)mi_atomic_addi((int64_t*)p, add); |
| 231 | +#else |
| 232 | + int64_t current; |
| 233 | + int64_t sum; |
| 234 | + do { |
| 235 | + current = *p; |
| 236 | + sum = current + add; |
| 237 | + } while (_InterlockedCompareExchange64(p, sum, current) != current); |
| 238 | + return current; |
| 239 | +#endif |
| 240 | +} |
| 241 | +static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { |
| 242 | + int64_t current; |
| 243 | + do { |
| 244 | + current = *p; |
| 245 | + } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); |
| 246 | +} |
| 247 | + |
| 248 | +// The pointer macros cast to `uintptr_t`. |
| 249 | +#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) |
| 250 | +#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) |
| 251 | +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) |
| 252 | +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) |
| 253 | +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) |
| 254 | +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) |
| 255 | +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) |
| 256 | +#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) |
| 257 | +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) |
| 258 | + |
| 259 | +#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire)) |
| 260 | +#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed)) |
| 261 | +#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release)) |
| 262 | +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed)) |
| 263 | + |
| 264 | + |
| 265 | +#endif |
| 266 | + |
| 267 | + |
| 268 | +// Atomically add a signed value; returns the previous value. |
| 269 | +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { |
| 270 | + return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); |
| 271 | +} |
| 272 | + |
| 273 | +// Atomically subtract a signed value; returns the previous value. |
| 274 | +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { |
| 275 | + return (intptr_t)mi_atomic_addi(p, -sub); |
| 276 | +} |
| 277 | + |
| 278 | +// Yield |
| 279 | +#if defined(__cplusplus) |
| 280 | +#include <thread> |
| 281 | +static inline void mi_atomic_yield(void) { |
| 282 | + std::this_thread::yield(); |
| 283 | +} |
| 284 | +#elif defined(_WIN32) |
| 285 | +#define WIN32_LEAN_AND_MEAN |
| 286 | +#include <windows.h> |
| 287 | +static inline void mi_atomic_yield(void) { |
| 288 | + YieldProcessor(); |
| 289 | +} |
| 290 | +#elif defined(__SSE2__) |
| 291 | +#include <emmintrin.h> |
| 292 | +static inline void mi_atomic_yield(void) { |
| 293 | + _mm_pause(); |
| 294 | +} |
| 295 | +#elif (defined(__GNUC__) || defined(__clang__)) && \ |
| 296 | + (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ |
| 297 | + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) |
| 298 | +#if defined(__x86_64__) || defined(__i386__) |
| 299 | +static inline void mi_atomic_yield(void) { |
| 300 | + __asm__ volatile ("pause" ::: "memory"); |
| 301 | +} |
| 302 | +#elif defined(__aarch64__) |
| 303 | +static inline void mi_atomic_yield(void) { |
| 304 | + __asm__ volatile("wfe"); |
| 305 | +} |
| 306 | +#elif (defined(__arm__) && __ARM_ARCH__ >= 7) |
| 307 | +static inline void mi_atomic_yield(void) { |
| 308 | + __asm__ volatile("yield" ::: "memory"); |
| 309 | +} |
| 310 | +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) |
| 311 | +static inline void mi_atomic_yield(void) { |
| 312 | + __asm__ __volatile__ ("or 27,27,27" ::: "memory"); |
| 313 | +} |
| 314 | +#elif defined(__armel__) || defined(__ARMEL__) |
| 315 | +static inline void mi_atomic_yield(void) { |
| 316 | + __asm__ volatile ("nop" ::: "memory"); |
| 317 | +} |
| 318 | +#endif |
| 319 | +#elif defined(__sun) |
| 320 | +// Fallback for other archs |
| 321 | +#include <synch.h> |
| 322 | +static inline void mi_atomic_yield(void) { |
| 323 | + smt_pause(); |
| 324 | +} |
| 325 | +#elif defined(__wasi__) |
| 326 | +#include <sched.h> |
| 327 | +static inline void mi_atomic_yield(void) { |
| 328 | + sched_yield(); |
| 329 | +} |
| 330 | +#else |
| 331 | +#include <unistd.h> |
| 332 | +static inline void mi_atomic_yield(void) { |
| 333 | + sleep(0); |
| 334 | +} |
| 335 | +#endif |
| 336 | + |
| 337 | + |
| 338 | +#endif // __MIMALLOC_ATOMIC_H |
0 commit comments