-
-
Notifications
You must be signed in to change notification settings - Fork 32.3k
bpo-30747: Attempt to fix atomic load/store #2383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,12 @@ | |
#include <stdatomic.h> | ||
#endif | ||
|
||
|
||
#if defined(_MSC_VER) | ||
#include <intrin.h> | ||
#include <immintrin.h> | ||
#endif | ||
|
||
/* This is modeled after the atomics interface from C1x, according to | ||
* the draft at | ||
* http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. | ||
|
@@ -87,8 +93,9 @@ typedef struct _Py_atomic_int { | |
|| (ORDER) == __ATOMIC_CONSUME), \ | ||
__atomic_load_n(&(ATOMIC_VAL)->_value, ORDER)) | ||
|
||
#else | ||
|
||
/* Only support GCC (for expression statements) and x86 (for simple | ||
* atomic semantics) and MSVC x86/x64/ARM */ | ||
#elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) | ||
typedef enum _Py_memory_order { | ||
_Py_memory_order_relaxed, | ||
_Py_memory_order_acquire, | ||
|
@@ -105,9 +112,6 @@ typedef struct _Py_atomic_int { | |
int _value; | ||
} _Py_atomic_int; | ||
|
||
/* Only support GCC (for expression statements) and x86 (for simple | ||
* atomic semantics) for now */ | ||
#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) | ||
|
||
static __inline__ void | ||
_Py_atomic_signal_fence(_Py_memory_order order) | ||
|
@@ -127,7 +131,7 @@ _Py_atomic_thread_fence(_Py_memory_order order) | |
static __inline__ void | ||
_Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) | ||
{ | ||
(void)address; /* shut up -Wunused-parameter */ | ||
(void)address; /* shut up -Wunused-parameter */ | ||
switch(order) { | ||
case _Py_memory_order_release: | ||
case _Py_memory_order_acq_rel: | ||
|
@@ -219,7 +223,291 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) | |
result; \ | ||
}) | ||
|
||
#else /* !gcc x86 */ | ||
#elif defined(_MSC_VER) | ||
/* _Interlocked* functions provide a full memory barrier and are therefore | ||
enough for acq_rel and seq_cst. If the HLE variants aren't available | ||
in hardware they will fall back to a full memory barrier as well. | ||
|
||
This might affect performance but likely only in some very specific and | ||
hard to meassure scenario. | ||
*/ | ||
#if defined(_M_IX86) || defined(_M_X64) | ||
typedef enum _Py_memory_order { | ||
_Py_memory_order_relaxed, | ||
_Py_memory_order_acquire, | ||
_Py_memory_order_release, | ||
_Py_memory_order_acq_rel, | ||
_Py_memory_order_seq_cst | ||
} _Py_memory_order; | ||
|
||
typedef struct _Py_atomic_address { | ||
volatile uintptr_t _value; | ||
} _Py_atomic_address; | ||
|
||
typedef struct _Py_atomic_int { | ||
volatile int _value; | ||
} _Py_atomic_int; | ||
|
||
|
||
#if defined(_M_X64) | ||
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok, I think the load and store macros are ok as defined, but that's because MSDN says "Most of the interlocked functions provide full memory barriers on all Windows platforms". Perhaps you can add a comment to that effect? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. |
||
switch (ORDER) { \ | ||
case _Py_memory_order_acquire: \ | ||
_InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it a runtime check whether HLE is available or not, or is it a compile-time check? If compile-time, it means we may mistakingly produce builds that won't run on non-HLE processors. @zooba There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to the documentation these will use HLEAcquire if the cpu supports it or use a regular _InterlockedExchange64 docs here https://msdn.microsoft.com/en-us/library/1s26w950.aspx specifically this bit
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand that. My question is whether the CPU detection happens at compile time or at run time. I can't find any information online... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no detection happening. Intel seem to have found a way to make it backwards compatible so older cpus will ignore the extra hint. I tested this quickly and here's the generated assembly for the HLE and non HLE versions
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's great, thank you! |
||
break; \ | ||
case _Py_memory_order_release: \ | ||
_InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ | ||
break; \ | ||
default: \ | ||
_InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ | ||
break; \ | ||
} | ||
#else | ||
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); | ||
#endif | ||
|
||
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ | ||
switch (ORDER) { \ | ||
case _Py_memory_order_acquire: \ | ||
_InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ | ||
break; \ | ||
case _Py_memory_order_release: \ | ||
_InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ | ||
break; \ | ||
default: \ | ||
_InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ | ||
break; \ | ||
} | ||
|
||
#if defined(_M_X64) | ||
/* This has to be an intptr_t for now. | ||
gil_created() uses -1 as a sentinel value, if this returns | ||
a uintptr_t it will do an unsigned compare and crash | ||
*/ | ||
inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { | ||
uintptr_t old; | ||
switch (order) { | ||
case _Py_memory_order_acquire: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange64_HLEAcquire(value, old, old) != old); | ||
break; | ||
} | ||
case _Py_memory_order_release: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange64_HLERelease(value, old, old) != old); | ||
break; | ||
} | ||
case _Py_memory_order_relaxed: | ||
old = *value; | ||
break; | ||
default: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange64(value, old, old) != old); | ||
break; | ||
} | ||
} | ||
return old; | ||
} | ||
|
||
#else | ||
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL | ||
#endif | ||
|
||
inline int _Py_atomic_load_32bit(volatile int* value, int order) { | ||
int old; | ||
switch (order) { | ||
case _Py_memory_order_acquire: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange_HLEAcquire(value, old, old) != old); | ||
break; | ||
} | ||
case _Py_memory_order_release: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange_HLERelease(value, old, old) != old); | ||
break; | ||
} | ||
case _Py_memory_order_relaxed: | ||
old = *value; | ||
break; | ||
default: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange(value, old, old) != old); | ||
break; | ||
} | ||
} | ||
return old; | ||
} | ||
|
||
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ | ||
if (sizeof(*ATOMIC_VAL._value) == 8) { \ | ||
_Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ | ||
_Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } | ||
|
||
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ | ||
( \ | ||
sizeof(*(ATOMIC_VAL._value)) == 8 ? \ | ||
_Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \ | ||
_Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \ | ||
) | ||
#elif defined(_M_ARM) || defined(_M_ARM64) | ||
typedef enum _Py_memory_order { | ||
_Py_memory_order_relaxed, | ||
_Py_memory_order_acquire, | ||
_Py_memory_order_release, | ||
_Py_memory_order_acq_rel, | ||
_Py_memory_order_seq_cst | ||
} _Py_memory_order; | ||
|
||
typedef struct _Py_atomic_address { | ||
volatile uintptr_t _value; | ||
} _Py_atomic_address; | ||
|
||
typedef struct _Py_atomic_int { | ||
volatile int _value; | ||
} _Py_atomic_int; | ||
|
||
|
||
#if defined(_M_ARM64) | ||
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ | ||
switch (ORDER) { \ | ||
case _Py_memory_order_acquire: \ | ||
_InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ | ||
break; \ | ||
case _Py_memory_order_release: \ | ||
_InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ | ||
break; \ | ||
default: \ | ||
_InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \ | ||
break; \ | ||
} | ||
#else | ||
#define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); | ||
#endif | ||
|
||
#define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ | ||
switch (ORDER) { \ | ||
case _Py_memory_order_acquire: \ | ||
_InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ | ||
break; \ | ||
case _Py_memory_order_release: \ | ||
_InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ | ||
break; \ | ||
default: \ | ||
_InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \ | ||
break; \ | ||
} | ||
|
||
#if defined(_M_ARM64) | ||
/* This has to be an intptr_t for now. | ||
gil_created() uses -1 as a sentinel value, if this returns | ||
a uintptr_t it will do an unsigned compare and crash | ||
*/ | ||
inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) { | ||
uintptr_t old; | ||
switch (order) { | ||
case _Py_memory_order_acquire: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange64_acq(value, old, old) != old); | ||
break; | ||
} | ||
case _Py_memory_order_release: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange64_rel(value, old, old) != old); | ||
break; | ||
} | ||
case _Py_memory_order_relaxed: | ||
old = *value; | ||
break; | ||
default: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange64(value, old, old) != old); | ||
break; | ||
} | ||
} | ||
return old; | ||
} | ||
|
||
#else | ||
#define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL | ||
#endif | ||
|
||
inline int _Py_atomic_load_32bit(volatile int* value, int order) { | ||
int old; | ||
switch (order) { | ||
case _Py_memory_order_acquire: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange_acq(value, old, old) != old); | ||
break; | ||
} | ||
case _Py_memory_order_release: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange_rel(value, old, old) != old); | ||
break; | ||
} | ||
case _Py_memory_order_relaxed: | ||
old = *value; | ||
break; | ||
default: | ||
{ | ||
do { | ||
old = *value; | ||
} while(_InterlockedCompareExchange(value, old, old) != old); | ||
break; | ||
} | ||
} | ||
return old; | ||
} | ||
|
||
#define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ | ||
if (sizeof(*ATOMIC_VAL._value) == 8) { \ | ||
_Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \ | ||
_Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } | ||
|
||
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ | ||
( \ | ||
sizeof(*(ATOMIC_VAL._value)) == 8 ? \ | ||
_Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \ | ||
_Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \ | ||
) | ||
#endif | ||
#else /* !gcc x86 !_msc_ver */ | ||
typedef enum _Py_memory_order { | ||
_Py_memory_order_relaxed, | ||
_Py_memory_order_acquire, | ||
_Py_memory_order_release, | ||
_Py_memory_order_acq_rel, | ||
_Py_memory_order_seq_cst | ||
} _Py_memory_order; | ||
|
||
typedef struct _Py_atomic_address { | ||
uintptr_t _value; | ||
} _Py_atomic_address; | ||
|
||
typedef struct _Py_atomic_int { | ||
int _value; | ||
} _Py_atomic_int; | ||
/* Fall back to other compilers and processors by assuming that simple | ||
volatile accesses are atomic. This is false, so people should port | ||
this. */ | ||
|
@@ -229,8 +517,6 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) | |
((ATOMIC_VAL)->_value = NEW_VAL) | ||
#define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ | ||
((ATOMIC_VAL)->_value) | ||
|
||
#endif /* !gcc x86 */ | ||
#endif | ||
|
||
/* Standardized shortcuts. */ | ||
|
@@ -245,6 +531,5 @@ _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) | |
_Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed) | ||
#define _Py_atomic_load_relaxed(ATOMIC_VAL) \ | ||
_Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed) | ||
|
||
#endif /* Py_BUILD_CORE */ | ||
#endif /* Py_ATOMIC_H */ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Add a non-dummy implementation of _Py_atomic_store and _Py_atomic_load on | ||
MSVC. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps remove this comment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was thinking of updating it to reflect the current state.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like you forgot to update it :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed now