Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
60884ab
Atomic CAS with pad (#23, P0528R3)
AlexGuteniev Jul 10, 2020
edd535b
formatting
AlexGuteniev Jul 10, 2020
c3c6c1c
Address review comments
AlexGuteniev Jul 11, 2020
00f0ce4
more review comments
AlexGuteniev Jul 11, 2020
2b635e5
Disable for EDG too
AlexGuteniev Jul 11, 2020
facabae
Skip test for clang, also more interesting bits
AlexGuteniev Jul 11, 2020
c072791
TRANSITION, LLVM-46685
AlexGuteniev Jul 11, 2020
2250eec
Improve TRANSITION comment
AlexGuteniev Jul 11, 2020
cec43ef
Don't use has_unique_object_representations_v
AlexGuteniev Jul 12, 2020
c6a7614
longer
AlexGuteniev Jul 12, 2020
de3b54d
centralize preprocessor
AlexGuteniev Jul 15, 2020
979cac7
test cleanup
AlexGuteniev Jul 15, 2020
56793a7
undef
AlexGuteniev Jul 15, 2020
dcdfa48
Update stl/inc/atomic
AlexGuteniev Jul 15, 2020
97076e4
Update stl/inc/atomic
AlexGuteniev Jul 15, 2020
1a3e44e
delete the remaining &
AlexGuteniev Jul 15, 2020
465ebe1
Don't penalize normal atomics in debug mode
AlexGuteniev Jul 17, 2020
ccbf24b
non-chained
AlexGuteniev Jul 17, 2020
ef818b7
for
AlexGuteniev Jul 17, 2020
d9620b0
+zero case
AlexGuteniev Jul 17, 2020
69eb997
clang format
AlexGuteniev Jul 17, 2020
1662f1c
finish up zero case
AlexGuteniev Jul 17, 2020
0a35a47
Review comments on non-lock-free case
AlexGuteniev Jul 18, 2020
9d96b8e
-addressof
AlexGuteniev Jul 18, 2020
00e7416
more sizes
AlexGuteniev Jul 18, 2020
2c05e4f
missing macro wrap
AlexGuteniev Jul 18, 2020
027e7b9
Merge remote-tracking branch 'origin/master' into cmpxchg
BillyONeal Jul 20, 2020
1b75b4e
Fix compare_exchange when _Ty has a nontrivial default ctor using _St…
BillyONeal Jul 20, 2020
76de2ad
Fix STL CR comments.
BillyONeal Jul 21, 2020
5280628
Update tests/std/tests/P0528R3_cmpxchg_pad/test.cpp
BillyONeal Jul 21, 2020
c2feae9
Update stl/inc/atomic
BillyONeal Jul 21, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 151 additions & 7 deletions stl/inc/atomic
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,52 @@ _NODISCARD extern "C" bool __cdecl __std_atomic_has_cmpxchg16b() noexcept;
#define ATOMIC_LLONG_LOCK_FREE 2
#define ATOMIC_POINTER_LOCK_FREE 2

// Padding bits should not participate in cmpxchg comparison starting in C++20.
// Clang does not have __builtin_zero_non_value_bits to exclude these bits to implement this C++20 feature.
// The EDG front-end substitutes everything and runs into incomplete types passed to atomic<T>.
#if _HAS_CXX20 && !defined(__clang__) /* TRANSITION, LLVM-46685 */ && !defined(__EDG__)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm should we do this always even when not C++20? It seems a straight up correctness win in all modes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm also asking #1029 (comment) :-)

I agree on one hand, on the other, remember atomic constructor?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The atomic constructor situation is different because it had "static initialization order fiasco" effects not present here.

Copy link
Contributor Author

@AlexGuteniev AlexGuteniev Jul 18, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, but I can imagine other ways, like defining a type with = as memcpy and == as memcmp, thus effectively making its padding bits participate in value. I think old Standard explicitly specifies bitwise CAS instead of leaving out this case undefined.

(Sure that's not a normal use case problem, but neither is this whole feature)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(if the decision is made towards exposing in pre-C++20, be sure to replace constexpr with _CONSTEXPR_IF)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Sure that's not a normal use case problem, but neither is this whole feature)

Indeed.

I'm OK with leaving it '20 only.

#define _CMPXCHG_MASK_OUT_PADDING_BITS 1
#else
#define _CMPXCHG_MASK_OUT_PADDING_BITS 0
#endif

_STD_BEGIN
// STRUCT TEMPLATE _Storage_for
#if _CMPXCHG_MASK_OUT_PADDING_BITS
struct _Form_mask_t {};
_INLINE_VAR constexpr _Form_mask_t _Form_mask{};
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS

template <class _Ty>
struct _Storage_for {
// uninitialized space to store a _Ty
alignas(_Ty) unsigned char _Storage[sizeof(_Ty)];

_Storage_for() = default;
_Storage_for(const _Storage_for&) = delete;
_Storage_for& operator=(const _Storage_for&) = delete;

#if _CMPXCHG_MASK_OUT_PADDING_BITS
explicit _Storage_for(_Form_mask_t) noexcept {
_CSTD memset(_Storage, 0xff, sizeof(_Ty));
__builtin_zero_non_value_bits(_Ptr());
}
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS

_NODISCARD _Ty& _Ref() noexcept {
return reinterpret_cast<_Ty&>(_Storage);
}

_NODISCARD _Ty* _Ptr() noexcept {
return reinterpret_cast<_Ty*>(&_Storage);
}
};

#if _CMPXCHG_MASK_OUT_PADDING_BITS
template <class _Ty>
inline constexpr bool _Might_have_non_value_bits =
!has_unique_object_representations_v<_Ty> && !is_floating_point_v<_Ty>;
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS

// FENCES
extern "C" inline void atomic_thread_fence(const memory_order _Order) noexcept {
Expand Down Expand Up @@ -355,13 +400,27 @@ struct _Atomic_storage {
const auto _Storage_ptr = _STD addressof(_Storage);
const auto _Expected_ptr = _STD addressof(_Expected);
bool _Result;
#if _CMPXCHG_MASK_OUT_PADDING_BITS
__builtin_zero_non_value_bits(_Expected_ptr);
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS
_Lock();
if (_CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0) {
#if _CMPXCHG_MASK_OUT_PADDING_BITS
if constexpr (_Might_have_non_value_bits<_Ty>) {
_Storage_for<_Ty> _Local;
const auto _Local_ptr = _Local._Ptr();
_CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_Ty));
__builtin_zero_non_value_bits(_Local_ptr);
_Result = _CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_Ty)) == 0;
} else {
_Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0;
}
#else // _CMPXCHG_MASK_OUT_PADDING_BITS
_Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0;
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS
if (_Result) {
_CSTD memcpy(_Storage_ptr, _STD addressof(_Desired), sizeof(_Ty));
_Result = true;
} else {
_CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_Ty));
_Result = false;
}

_Unlock();
Expand Down Expand Up @@ -480,8 +539,29 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics

bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired,
const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order
const char _Expected_bytes = _Atomic_reinterpret_as<char>(_Expected); // read before atomic operation
char _Expected_bytes = _Atomic_reinterpret_as<char>(_Expected); // read before atomic operation
char _Prev_bytes;

#if _CMPXCHG_MASK_OUT_PADDING_BITS
if constexpr (_Might_have_non_value_bits<_Ty>) {
_Storage_for<_Ty> _Mask{_Form_mask};
const char _Mask_val = _Atomic_reinterpret_as<char>(_Mask._Ref());

for (;;) {
_ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8,
_Atomic_address_as<char>(_Storage), _Atomic_reinterpret_as<char>(_Desired), _Expected_bytes);
if (_Prev_bytes == _Expected_bytes) {
return true;
}

if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) {
reinterpret_cast<char&>(_Expected) = _Prev_bytes;
return false;
}
_Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val);
}
}
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS
_ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, _Atomic_address_as<char>(_Storage),
_Atomic_reinterpret_as<char>(_Desired), _Expected_bytes);
if (_Prev_bytes == _Expected_bytes) {
Expand Down Expand Up @@ -562,8 +642,28 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics

bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired,
const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order
const short _Expected_bytes = _Atomic_reinterpret_as<short>(_Expected); // read before atomic operation
short _Expected_bytes = _Atomic_reinterpret_as<short>(_Expected); // read before atomic operation
short _Prev_bytes;
#if _CMPXCHG_MASK_OUT_PADDING_BITS
if constexpr (_Might_have_non_value_bits<_Ty>) {
_Storage_for<_Ty> _Mask{_Form_mask};
const short _Mask_val = _Atomic_reinterpret_as<short>(_Mask._Ref());

for (;;) {
_ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16,
_Atomic_address_as<short>(_Storage), _Atomic_reinterpret_as<short>(_Desired), _Expected_bytes);
if (_Prev_bytes == _Expected_bytes) {
return true;
}

if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) {
_CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty));
return false;
}
_Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val);
}
}
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS
_ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16,
_Atomic_address_as<short>(_Storage), _Atomic_reinterpret_as<short>(_Desired), _Expected_bytes);
if (_Prev_bytes == _Expected_bytes) {
Expand Down Expand Up @@ -642,8 +742,28 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics

bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired,
const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order
const long _Expected_bytes = _Atomic_reinterpret_as<long>(_Expected); // read before atomic operation
long _Expected_bytes = _Atomic_reinterpret_as<long>(_Expected); // read before atomic operation
long _Prev_bytes;
#if _CMPXCHG_MASK_OUT_PADDING_BITS
if constexpr (_Might_have_non_value_bits<_Ty>) {
_Storage_for<_Ty> _Mask{_Form_mask};
const long _Mask_val = _Atomic_reinterpret_as<long>(_Mask);

for (;;) {
_ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange,
_Atomic_address_as<long>(_Storage), _Atomic_reinterpret_as<long>(_Desired), _Expected_bytes);
if (_Prev_bytes == _Expected_bytes) {
return true;
}

if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) {
_CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty));
return false;
}
_Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val);
}
}
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS
_ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, _Atomic_address_as<long>(_Storage),
_Atomic_reinterpret_as<long>(_Desired), _Expected_bytes);
if (_Prev_bytes == _Expected_bytes) {
Expand Down Expand Up @@ -749,8 +869,30 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics

bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired,
const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order
const long long _Expected_bytes = _Atomic_reinterpret_as<long long>(_Expected); // read before atomic operation
long long _Expected_bytes = _Atomic_reinterpret_as<long long>(_Expected); // read before atomic operation
long long _Prev_bytes;

#if _CMPXCHG_MASK_OUT_PADDING_BITS
if constexpr (_Might_have_non_value_bits<_Ty>) {
_Storage_for<_Ty> _Mask{_Form_mask};
const long long _Mask_val = _Atomic_reinterpret_as<long long>(_Mask);

for (;;) {
_ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64,
_Atomic_address_as<long long>(_Storage), _Atomic_reinterpret_as<long long>(_Desired),
_Expected_bytes);
if (_Prev_bytes == _Expected_bytes) {
return true;
}

if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) {
_CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty));
return false;
}
_Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val);
}
}
#endif // _CMPXCHG_MASK_OUT_PADDING_BITS
_ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64,
_Atomic_address_as<long long>(_Storage), _Atomic_reinterpret_as<long long>(_Desired), _Expected_bytes);
if (_Prev_bytes == _Expected_bytes) {
Expand Down Expand Up @@ -2103,6 +2245,8 @@ inline void atomic_flag_clear_explicit(volatile atomic_flag* _Flag, memory_order

_STD_END

#undef _CMPXCHG_MASK_OUT_PADDING_BITS

#undef _ATOMIC_CHOOSE_INTRINSIC
#undef _ATOMIC_HAS_DCAS

Expand Down
14 changes: 0 additions & 14 deletions stl/inc/execution
Original file line number Diff line number Diff line change
Expand Up @@ -3590,20 +3590,6 @@ _FwdIt partition(_ExPo&&, _FwdIt _First, const _FwdIt _Last, _Pr _Pred) noexcept
}

// PARALLEL FUNCTION TEMPLATE set_intersection
template <class _Ty>
struct _Storage_for {
// uninitialized space to store a _Ty
alignas(_Ty) unsigned char _Storage[sizeof(_Ty)];

_Storage_for() = default;
_Storage_for(const _Storage_for&) = delete;
_Storage_for& operator=(const _Storage_for&) = delete;

_Ty& _Ref() {
return reinterpret_cast<_Ty&>(_Storage);
}
};

inline constexpr unsigned char _Local_available = 1;
inline constexpr unsigned char _Sum_available = 2;

Expand Down
1 change: 1 addition & 0 deletions tests/std/test.lst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ tests\P0433R2_deduction_guides
tests\P0476R2_bit_cast
tests\P0487R1_fixing_operator_shl_basic_istream_char_pointer
tests\P0513R0_poisoning_the_hash
tests\P0528R3_cmpxchg_pad
tests\P0553R4_bit_rotating_and_counting_functions
tests\P0556R3_bit_integral_power_of_two_operations
tests\P0586R2_integer_comparison
Expand Down
4 changes: 4 additions & 0 deletions tests/std/tests/P0528R3_cmpxchg_pad/env.lst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

RUNALL_INCLUDE ..\usual_latest_matrix.lst
Loading