diff --git a/stl/inc/atomic b/stl/inc/atomic index 8881fae37a8..80377e2c722 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -115,7 +115,52 @@ _NODISCARD extern "C" bool __cdecl __std_atomic_has_cmpxchg16b() noexcept; #define ATOMIC_LLONG_LOCK_FREE 2 #define ATOMIC_POINTER_LOCK_FREE 2 +// Padding bits should not participate in cmpxchg comparison starting in C++20. +// Clang does not have __builtin_zero_non_value_bits to exclude these bits to implement this C++20 feature. +// The EDG front-end substitutes everything and runs into incomplete types passed to atomic. +#if _HAS_CXX20 && !defined(__clang__) /* TRANSITION, LLVM-46685 */ && !defined(__EDG__) +#define _CMPXCHG_MASK_OUT_PADDING_BITS 1 +#else +#define _CMPXCHG_MASK_OUT_PADDING_BITS 0 +#endif + _STD_BEGIN +// STRUCT TEMPLATE _Storage_for +#if _CMPXCHG_MASK_OUT_PADDING_BITS +struct _Form_mask_t {}; +_INLINE_VAR constexpr _Form_mask_t _Form_mask{}; +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS + +template +struct _Storage_for { + // uninitialized space to store a _Ty + alignas(_Ty) unsigned char _Storage[sizeof(_Ty)]; + + _Storage_for() = default; + _Storage_for(const _Storage_for&) = delete; + _Storage_for& operator=(const _Storage_for&) = delete; + +#if _CMPXCHG_MASK_OUT_PADDING_BITS + explicit _Storage_for(_Form_mask_t) noexcept { + _CSTD memset(_Storage, 0xff, sizeof(_Ty)); + __builtin_zero_non_value_bits(_Ptr()); + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS + + _NODISCARD _Ty& _Ref() noexcept { + return reinterpret_cast<_Ty&>(_Storage); + } + + _NODISCARD _Ty* _Ptr() noexcept { + return reinterpret_cast<_Ty*>(&_Storage); + } +}; + +#if _CMPXCHG_MASK_OUT_PADDING_BITS +template +inline constexpr bool _Might_have_non_value_bits = + !has_unique_object_representations_v<_Ty> && !is_floating_point_v<_Ty>; +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS // FENCES extern "C" inline void atomic_thread_fence(const memory_order _Order) noexcept { @@ -355,13 +400,27 @@ struct _Atomic_storage { const auto _Storage_ptr = _STD addressof(_Storage); const auto _Expected_ptr = _STD addressof(_Expected); bool _Result; +#if _CMPXCHG_MASK_OUT_PADDING_BITS + __builtin_zero_non_value_bits(_Expected_ptr); +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS _Lock(); - if (_CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0) { +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_Ty>) { + _Storage_for<_Ty> _Local; + const auto _Local_ptr = _Local._Ptr(); + _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_Ty)); + __builtin_zero_non_value_bits(_Local_ptr); + _Result = _CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_Ty)) == 0; + } else { + _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0; + } +#else // _CMPXCHG_MASK_OUT_PADDING_BITS + _Result = _CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) == 0; +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS + if (_Result) { _CSTD memcpy(_Storage_ptr, _STD addressof(_Desired), sizeof(_Ty)); - _Result = true; } else { _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_Ty)); - _Result = false; } _Unlock(); @@ -480,8 +539,29 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order - const char _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation + char _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation char _Prev_bytes; + +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_Ty>) { + _Storage_for<_Ty> _Mask{_Form_mask}; + const char _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); + + for (;;) { + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + + if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { + reinterpret_cast(_Expected) = _Prev_bytes; + return false; + } + _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { @@ -562,8 +642,28 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order - const short _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation + short _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation short _Prev_bytes; +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_Ty>) { + _Storage_for<_Ty> _Mask{_Form_mask}; + const short _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); + + for (;;) { + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + + if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + return false; + } + _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { @@ -642,8 +742,28 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order - const long _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation + long _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation long _Prev_bytes; +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_Ty>) { + _Storage_for<_Ty> _Mask{_Form_mask}; + const long _Mask_val = _Atomic_reinterpret_as(_Mask); + + for (;;) { + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + + if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + return false; + } + _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { @@ -749,8 +869,30 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics bool compare_exchange_strong(_Ty& _Expected, const _Ty _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with given memory order - const long long _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation + long long _Expected_bytes = _Atomic_reinterpret_as(_Expected); // read before atomic operation long long _Prev_bytes; + +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_Ty>) { + _Storage_for<_Ty> _Mask{_Form_mask}; + const long long _Mask_val = _Atomic_reinterpret_as(_Mask); + + for (;;) { + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), + _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + + if ((_Prev_bytes ^ _Expected_bytes) & _Mask_val) { + _CSTD memcpy(_STD addressof(_Expected), &_Prev_bytes, sizeof(_Ty)); + return false; + } + _Expected_bytes = (_Expected_bytes & _Mask_val) | (_Prev_bytes & ~_Mask_val); + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { @@ -2103,6 +2245,8 @@ inline void atomic_flag_clear_explicit(volatile atomic_flag* _Flag, memory_order _STD_END +#undef _CMPXCHG_MASK_OUT_PADDING_BITS + #undef _ATOMIC_CHOOSE_INTRINSIC #undef _ATOMIC_HAS_DCAS diff --git a/stl/inc/execution b/stl/inc/execution index d632e4c4788..18ccbc1d496 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -3590,20 +3590,6 @@ _FwdIt partition(_ExPo&&, _FwdIt _First, const _FwdIt _Last, _Pr _Pred) noexcept } // PARALLEL FUNCTION TEMPLATE set_intersection -template -struct _Storage_for { - // uninitialized space to store a _Ty - alignas(_Ty) unsigned char _Storage[sizeof(_Ty)]; - - _Storage_for() = default; - _Storage_for(const _Storage_for&) = delete; - _Storage_for& operator=(const _Storage_for&) = delete; - - _Ty& _Ref() { - return reinterpret_cast<_Ty&>(_Storage); - } -}; - inline constexpr unsigned char _Local_available = 1; inline constexpr unsigned char _Sum_available = 2; diff --git a/tests/std/test.lst b/tests/std/test.lst index 56b6f4f7191..995c701db60 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -220,6 +220,7 @@ tests\P0433R2_deduction_guides tests\P0476R2_bit_cast tests\P0487R1_fixing_operator_shl_basic_istream_char_pointer tests\P0513R0_poisoning_the_hash +tests\P0528R3_cmpxchg_pad tests\P0553R4_bit_rotating_and_counting_functions tests\P0556R3_bit_integral_power_of_two_operations tests\P0586R2_integer_comparison diff --git a/tests/std/tests/P0528R3_cmpxchg_pad/env.lst b/tests/std/tests/P0528R3_cmpxchg_pad/env.lst new file mode 100644 index 00000000000..642f530ffad --- /dev/null +++ b/tests/std/tests/P0528R3_cmpxchg_pad/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_latest_matrix.lst diff --git a/tests/std/tests/P0528R3_cmpxchg_pad/test.cpp b/tests/std/tests/P0528R3_cmpxchg_pad/test.cpp new file mode 100644 index 00000000000..44419f57410 --- /dev/null +++ b/tests/std/tests/P0528R3_cmpxchg_pad/test.cpp @@ -0,0 +1,254 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include + +struct X0 { + void operator&() const = delete; +}; + + +struct X1 { + char x : 6; + + void operator&() const = delete; + + void set(const char v) { + x = v; + } + + bool check(const char v) const { + return x == v; + } +}; + +struct X2 { + short x : 9; + + void operator&() const = delete; + + void set(const char v) { + x = v; + } + + bool check(const char v) const { + return x == v; + } +}; + +#pragma pack(push, 1) +struct X3 { + char x : 4; + char : 2; + char y : 1; + short z; + + void operator&() const = delete; + + void set(const char v) { + x = v; + y = 0; + z = ~v; + } + + bool check(const char v) const { + return x == v && z == ~v; + } +}; +#pragma pack(pop) + +#pragma warning(push) +#pragma warning(disable : 4324) // '%s': structure was padded due to alignment specifier +struct alignas(4) X4 { + char x; + + void operator&() const = delete; + + void set(const char v) { + x = v; + } + + bool check(const char v) const { + return x == v; + } +}; +#pragma warning(pop) + +#pragma warning(push) +#pragma warning(disable : 4324) // '%s': structure was padded due to alignment specifier +struct X6 { + char x; + alignas(2) char y[2]; + char z; + + void operator&() const = delete; + + void set(const char v) { + x = v; + std::memset(&y, 0, sizeof(y)); + z = ~v; + } + + bool check(const char v) const { + return x == v && z == ~v; + } +}; +#pragma warning(pop) + +struct X8 { + char x; + long y; + + void operator&() const = delete; + + void set(const char v) { + x = v; + y = 0; + } + + bool check(const char v) const { + return x == v; + } +}; + +#pragma pack(push, 1) +struct X9 { + X8 x; + char z; + + void operator&() const = delete; + + void set(const char v) { + x.set(v); + z = ~v; + } + + bool check(const char v) const { + return x.check(v) && z == ~v; + } +}; +#pragma pack(pop) + +struct X16 { + long x; + char y; + long long z; + + void operator&() const = delete; + + void set(const char v) { + x = v; + y = 0; + z = ~v; + } + + bool check(const char v) const { + return x == v && z == ~v; + } +}; + +struct X20 { + long x; + long y[3]; + char z; + + void operator&() const = delete; + + void set(const char v) { + x = v; + std::memset(&y, 0, sizeof(y)); + z = ~v; + } + + bool check(const char v) const { + return x == v && z == ~v; + } +}; + + +template +void test() { + static_assert(sizeof(X) == S, "Unexpected size"); + static_assert( + !std::has_unique_object_representations_v, "Type without padding is not useful for testing P0528."); + X x1; + X x2; + X x3; + X x4; + std::memset(std::addressof(x1), 0xaa, sizeof(x1)); + std::memset(std::addressof(x2), 0x55, sizeof(x2)); + std::memset(std::addressof(x3), 0x55, sizeof(x3)); + std::memset(std::addressof(x4), 0x55, sizeof(x4)); + x1.set(5); + x2.set(5); + x3.set(6); + x4.set(7); + + std::atomic v; + v.store(x1); + X x; + std::memcpy(std::addressof(x), std::addressof(x3), sizeof(x)); + assert(!v.compare_exchange_strong(x, x4)); + assert(v.load().check(5)); + + v.store(x1); + for (int retry = 0; retry != 10; ++retry) { + X xw; + std::memcpy(std::addressof(xw), std::addressof(x3), sizeof(x)); + assert(!v.compare_exchange_weak(xw, x4)); + assert(v.load().check(5)); + } + + v.store(x1); + std::memcpy(std::addressof(x), std::addressof(x2), sizeof(x)); + assert(v.compare_exchange_strong(x, x3)); + assert(v.load().check(6)); + + v.store(x1); + for (;;) { + X xw; + std::memcpy(std::addressof(xw), std::addressof(x2), sizeof(x)); + if (v.compare_exchange_weak(xw, x3)) { + break; + } + } + assert(v.load().check(6)); +} + + +template +void test0() { + X x1; + X x2; + X x3; + std::memset(std::addressof(x1), 0xaa, sizeof(x1)); + std::memset(std::addressof(x2), 0x55, sizeof(x2)); + std::memset(std::addressof(x3), 0x55, sizeof(x3)); + + std::atomic v; + v.store(x1); + X x; + std::memcpy(std::addressof(x), std::addressof(x3), sizeof(x)); + + assert(v.compare_exchange_strong(x, x2)); +} + +int main() { +#ifndef __clang__ // TRANSITION, LLVM-46685 + test0(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); + test(); +#endif // !__clang__, TRANSITION, LLVM-46685 + return 0; +}