From e4b75dcd447ec7fdd640f8b28eb73ffe096c3832 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 2 Aug 2020 03:54:07 +0300 Subject: [PATCH] Implement std::atomic::wait (#593) Co-authored-by: Curtis J Bezault Co-authored-by: Stephan T. Lavavej Co-authored-by: Billy Robert O'Neal III Co-authored-by: Casey Carter --- CMakeLists.txt | 8 +- stl/CMakeLists.txt | 33 +- stl/inc/atomic | 359 ++++++++++++++++-- stl/inc/execution | 74 ++-- stl/inc/memory | 47 ++- stl/inc/xatomic_wait.h | 72 ++++ stl/inc/yvals.h | 16 + stl/inc/yvals_core.h | 16 + stl/msbuild/stl_atomic_wait/dirs.proj | 15 + stl/msbuild/stl_atomic_wait/md/dirs.proj | 17 + .../msvcp_atomic_wait.nativeproj | 15 + .../msvcp_atomic_wait.nativeproj | 15 + .../msvcp_atomic_wait.nativeproj | 15 + .../msvcp_atomic_wait.nativeproj | 15 + .../msvcp_atomic_wait.settings.targets | 90 +++++ .../stl_atomic_wait/msvcprt_atomic_wait.rc | 23 ++ .../stl_atomic_wait.files.settings.targets | 15 + stl/msbuild/stl_atomic_wait/xmd/dirs.proj | 24 ++ .../msvcp_atomic_wait.nativeproj | 15 + .../msvcp_atomic_wait.nativeproj | 15 + .../msvcp_atomic_wait.nativeproj | 15 + .../msvcp_atomic_wait.nativeproj | 15 + .../stl_base/stl.files.settings.targets | 5 +- .../stl_post/msvcp_post.settings.targets | 3 + stl/src/atomic_wait.cpp | 334 ++++++++++++++++ stl/src/msvcp_atomic_wait.src | 25 ++ stl/src/parallel_algorithms.cpp | 257 +------------ tests/libcxx/expected_results.txt | 1 - tests/libcxx/skipped_tests.txt | 1 - tests/std/include/test_atomic_wait.hpp | 203 ++++++++++ tests/std/test.lst | 2 + tests/std/tests/P1135R6_atomic_wait/env.lst | 4 + tests/std/tests/P1135R6_atomic_wait/test.cpp | 10 + .../tests/P1135R6_atomic_wait_vista/env.lst | 4 + .../tests/P1135R6_atomic_wait_vista/test.cpp | 9 + .../VSO_0157762_feature_test_macros/test.cpp | 14 + 36 files changed, 1467 insertions(+), 334 deletions(-) create mode 100644 stl/inc/xatomic_wait.h create mode 100644 stl/msbuild/stl_atomic_wait/dirs.proj create mode 100644 stl/msbuild/stl_atomic_wait/md/dirs.proj create mode 100644 stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_app/msvcp_atomic_wait.nativeproj create mode 100644 stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_kernel32/msvcp_atomic_wait.nativeproj create mode 100644 stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_netfx/msvcp_atomic_wait.nativeproj create mode 100644 stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_onecore/msvcp_atomic_wait.nativeproj create mode 100644 stl/msbuild/stl_atomic_wait/msvcp_atomic_wait.settings.targets create mode 100644 stl/msbuild/stl_atomic_wait/msvcprt_atomic_wait.rc create mode 100644 stl/msbuild/stl_atomic_wait/stl_atomic_wait.files.settings.targets create mode 100644 stl/msbuild/stl_atomic_wait/xmd/dirs.proj create mode 100644 stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_app/msvcp_atomic_wait.nativeproj create mode 100644 stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_kernel32/msvcp_atomic_wait.nativeproj create mode 100644 stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_netfx/msvcp_atomic_wait.nativeproj create mode 100644 stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_onecore/msvcp_atomic_wait.nativeproj create mode 100644 stl/src/atomic_wait.cpp create mode 100644 stl/src/msvcp_atomic_wait.src create mode 100644 tests/std/include/test_atomic_wait.hpp create mode 100644 tests/std/tests/P1135R6_atomic_wait/env.lst create mode 100644 tests/std/tests/P1135R6_atomic_wait/test.cpp create mode 100644 tests/std/tests/P1135R6_atomic_wait_vista/env.lst create mode 100644 tests/std/tests/P1135R6_atomic_wait_vista/test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ed6054821..bf5cf6d9dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,27 +32,27 @@ if("${VCLIBS_TARGET_ARCHITECTURE}" MATCHES "^[xX]86$") set(VCLIBS_X86_OR_X64 "x86") # Note that we set _WIN32_WINNT to a high level to make declarations available, but still engage downlevel # runtime dynamic linking by setting our own _STL_WIN32_WINNT back to Windows XP. - add_compile_definitions(_X86_ _VCRT_WIN32_WINNT=_WIN32_WINNT_WINXP _STL_WIN32_WINNT=_WIN32_WINNT_WINXP + add_compile_definitions(_X86_ _VCRT_WIN32_WINNT=0x0501 _STL_WIN32_WINNT=0x0501 _WIN32_WINNT=0x0602 NTDDI_VERSION=NTDDI_WIN8) add_compile_options(/arch:IA32) elseif(VCLIBS_TARGET_ARCHITECTURE MATCHES "^[xX]64$") set(VCLIBS_TARGET_ARCHITECTURE "x64") set(VCLIBS_I386_OR_AMD64 "amd64") set(VCLIBS_X86_OR_X64 "x64") - add_compile_definitions(_AMD64_ _VCRT_WIN32_WINNT=_WIN32_WINNT_WINXP _STL_WIN32_WINNT=_WIN32_WINNT_WINXP + add_compile_definitions(_AMD64_ _VCRT_WIN32_WINNT=0x0501 _STL_WIN32_WINNT=0x0501 _WIN32_WINNT=0x0602 NTDDI_VERSION=NTDDI_WIN8) elseif(VCLIBS_TARGET_ARCHITECTURE MATCHES "^[aA][rR][mM][vV]7$") set(VCLIBS_TARGET_ARCHITECTURE "arm") set(VCLIBS_I386_OR_AMD64 "arm") set(VCLIBS_X86_OR_X64 "arm") - add_compile_definitions(_ARM_ _VCRT_WIN32_WINNT=_WIN32_WINNT_WIN8 _STL_WIN32_WINNT=_WIN32_WINNT_WIN8 + add_compile_definitions(_ARM_ _VCRT_WIN32_WINNT=0x0602 _STL_WIN32_WINNT=0x0602 _WIN32_WINNT=0x0602 NTDDI_VERSION=NTDDI_WIN8) string(APPEND CMAKE_CXX_STANDARD_LIBRARIES " Synchronization.lib") elseif(VCLIBS_TARGET_ARCHITECTURE MATCHES "^[aA][rR][mM]64$") set(VCLIBS_TARGET_ARCHITECTURE "arm64") set(VCLIBS_I386_OR_AMD64 "arm64") set(VCLIBS_X86_OR_X64 "arm64") - add_compile_definitions(_ARM64_ _VCRT_WIN32_WINNT=_WIN32_WINNT_WIN10 _STL_WIN32_WINNT=_WIN32_WINNT_WIN10 + add_compile_definitions(_ARM64_ _VCRT_WIN32_WINNT=0x0A00 _STL_WIN32_WINNT=0x0A00 _WIN32_WINNT=0x0A00 NTDDI_VERSION=NTDDI_WIN10) string(APPEND CMAKE_CXX_STANDARD_LIBRARIES " Synchronization.lib") else() diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt index 4d03fa0e18..23dc688099 100644 --- a/stl/CMakeLists.txt +++ b/stl/CMakeLists.txt @@ -196,6 +196,7 @@ set(HEADERS ${CMAKE_CURRENT_LIST_DIR}/inc/vector ${CMAKE_CURRENT_LIST_DIR}/inc/version ${CMAKE_CURRENT_LIST_DIR}/inc/xatomic.h + ${CMAKE_CURRENT_LIST_DIR}/inc/xatomic_wait.h ${CMAKE_CURRENT_LIST_DIR}/inc/xbit_ops.h ${CMAKE_CURRENT_LIST_DIR}/inc/xcall_once.h ${CMAKE_CURRENT_LIST_DIR}/inc/xcharconv.h @@ -241,7 +242,6 @@ set(IMPLIB_SOURCES ${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp ${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp ${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp - ${CMAKE_CURRENT_LIST_DIR}/src/parallel_algorithms.cpp ${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp ${CMAKE_CURRENT_LIST_DIR}/src/syserror_import_lib.cpp ${CMAKE_CURRENT_LIST_DIR}/src/vector_algorithms.cpp @@ -388,6 +388,11 @@ set(SOURCES_SATELLITE_2 ${CMAKE_CURRENT_LIST_DIR}/src/special_math.cpp ) +set(SOURCES_SATELLITE_ATOMIC_WAIT + ${CMAKE_CURRENT_LIST_DIR}/src/atomic_wait.cpp + ${CMAKE_CURRENT_LIST_DIR}/src/parallel_algorithms.cpp +) + set(SOURCES_SATELLITE_CODECVT_IDS ${CMAKE_CURRENT_LIST_DIR}/src/ulocale.cpp ) @@ -396,6 +401,7 @@ set(SOURCES_SATELLITE_CODECVT_IDS set(STATIC_SOURCES ${SOURCES_SATELLITE_1} ${SOURCES_SATELLITE_2} + ${SOURCES_SATELLITE_ATOMIC_WAIT} ${SOURCES_SATELLITE_CODECVT_IDS} ) @@ -459,6 +465,27 @@ function(add_stl_dlls D_SUFFIX THIS_CONFIG_DEFINITIONS THIS_CONFIG_COMPILE_OPTIO set_target_properties(msvcp_2${D_SUFFIX} PROPERTIES OUTPUT_NAME "msvcp140_2${D_SUFFIX}${VCLIBS_SUFFIX}") target_link_options(msvcp_2${D_SUFFIX} PRIVATE "${THIS_CONFIG_LINK_OPTIONS}") + # msvcp140_atomic_wait.dll (the atomic wait satellite) + add_library(msvcp${D_SUFFIX}_atomic_wait_objects OBJECT ${SOURCES_SATELLITE_ATOMIC_WAIT}) + target_compile_definitions(msvcp${D_SUFFIX}_atomic_wait_objects PRIVATE "_BUILDING_SATELLITE_ATOMIC_WAIT;_DLL;${THIS_CONFIG_DEFINITIONS}") + target_compile_options(msvcp${D_SUFFIX}_atomic_wait_objects PRIVATE "${THIS_CONFIG_COMPILE_OPTIONS};${GL_FLAG};/EHsc") + + # generate the .def for msvcp140_atomic_wait.dll + set(_ATOMIC_WAIT_OUTPUT_NAME "msvcp140${D_SUFFIX}_atomic_wait${VCLIBS_SUFFIX}") + string(TOUPPER "${_ATOMIC_WAIT_OUTPUT_NAME}" _ATOMIC_WAIT_OUTPUT_NAME_UPPER) + set(_ATOMIC_WAIT_DEF_NAME "${CMAKE_BINARY_DIR}/msvcp_atomic_wait${D_SUFFIX}.def") + set(_ATOMIC_WAIT_DEF_FILE_SRC "${CMAKE_CURRENT_LIST_DIR}/src/msvcp_atomic_wait.src") + set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${_ATOMIC_WAIT_DEF_FILE_SRC}") + file(READ "${_ATOMIC_WAIT_DEF_FILE_SRC}" _ATOMIC_WAIT_SRC_CONTENTS) + string(REPLACE "LIBRARYNAME" "${_ATOMIC_WAIT_OUTPUT_NAME_UPPER}" _ATOMIC_WAIT_DEF_CONTENTS "${_ATOMIC_WAIT_SRC_CONTENTS}") + file(WRITE "${_ATOMIC_WAIT_DEF_NAME}" "${_ATOMIC_WAIT_DEF_CONTENTS}") + + add_library(msvcp${D_SUFFIX}_atomic_wait SHARED "${_ATOMIC_WAIT_DEF_NAME}") + target_link_libraries(msvcp${D_SUFFIX}_atomic_wait PRIVATE msvcp${D_SUFFIX}_atomic_wait_objects "msvcp${D_SUFFIX}" "${TOOLSET_LIB}/vcruntime${D_SUFFIX}.lib" "${TOOLSET_LIB}/msvcrt${D_SUFFIX}.lib" "ucrt${D_SUFFIX}.lib") + set_target_properties(msvcp${D_SUFFIX}_atomic_wait PROPERTIES ARCHIVE_OUTPUT_NAME "msvcp140_atomic_wait${D_SUFFIX}${VCLIBS_SUFFIX}") + set_target_properties(msvcp${D_SUFFIX}_atomic_wait PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") + set_target_properties(msvcp${D_SUFFIX}_atomic_wait PROPERTIES OUTPUT_NAME "${_ATOMIC_WAIT_OUTPUT_NAME}") + # msvcp140_codecvt_ids.dll add_library(msvcp${D_SUFFIX}_codecvt_ids_objects OBJECT ${SOURCES_SATELLITE_CODECVT_IDS}) target_compile_definitions(msvcp${D_SUFFIX}_codecvt_ids_objects PRIVATE "_BUILDING_SATELLITE_CODECVT_IDS;_DLL;${THIS_CONFIG_DEFINITIONS}") @@ -474,8 +501,8 @@ function(add_stl_dlls D_SUFFIX THIS_CONFIG_DEFINITIONS THIS_CONFIG_COMPILE_OPTIO # import library add_library(msvcp${D_SUFFIX}_implib STATIC ${HEADERS}) target_link_libraries(msvcp${D_SUFFIX}_implib msvcp${D_SUFFIX}_implib_objects std_init_once_begin_initialize std_init_once_complete) - add_dependencies(msvcp${D_SUFFIX}_implib msvcp${D_SUFFIX} msvcp_1${D_SUFFIX} msvcp_2${D_SUFFIX} msvcp${D_SUFFIX}_codecvt_ids) - set_target_properties(msvcp${D_SUFFIX}_implib PROPERTIES STATIC_LIBRARY_OPTIONS "/NOLOGO;/NODEFAULTLIB;/IGNORE:4006;$;$;$;$") + add_dependencies(msvcp${D_SUFFIX}_implib msvcp${D_SUFFIX} msvcp_1${D_SUFFIX} msvcp_2${D_SUFFIX} msvcp${D_SUFFIX}_atomic_wait msvcp${D_SUFFIX}_codecvt_ids) + set_target_properties(msvcp${D_SUFFIX}_implib PROPERTIES STATIC_LIBRARY_OPTIONS "/NOLOGO;/NODEFAULTLIB;/IGNORE:4006;$;$;$;$;$") set_target_properties(msvcp${D_SUFFIX}_implib PROPERTIES ARCHIVE_OUTPUT_NAME "msvcprt${D_SUFFIX}") endfunction() diff --git a/stl/inc/atomic b/stl/inc/atomic index 80377e2c72..633280f5d5 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -17,6 +17,9 @@ #include #include #include +#if _HAS_CXX20 +#include +#endif // _HAS_CXX20 #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) @@ -357,6 +360,83 @@ template #else // ^^^ don't break ABI / break ABI vvv template ::_Storage_size> #endif // TRANSITION, ABI +struct _Atomic_storage; + +#if _HAS_CXX20 +template +void _Atomic_wait_direct( + const _Atomic_storage<_Ty>* const _This, _Value_type _Expected_bytes, const memory_order _Order) noexcept { + const auto _Storage_ptr = _STD addressof(_This->_Storage); + for (;;) { + const _Value_type _Observed_bytes = _Atomic_reinterpret_as<_Value_type>(_This->load(_Order)); + if (_Expected_bytes != _Observed_bytes) { +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_Ty>) { + _Storage_for<_Ty> _Mask{_Form_mask}; + const _Value_type _Mask_val = _Atomic_reinterpret_as<_Value_type>(_Mask._Ref()); + + if (((_Expected_bytes ^ _Observed_bytes) & _Mask_val) == 0) { + _Expected_bytes = _Observed_bytes; + continue; + } + } +#endif // _CMPXCHG_MASK_OUT_PADDING_BITS + + return; + } + + __std_atomic_wait_direct(_Storage_ptr, &_Expected_bytes, sizeof(_Value_type), _Atomic_wait_no_timeout); + } +} +#endif // _HAS_CXX20 + +#if 1 // TRANSITION, ABI +inline void _Atomic_lock_spinlock(long& _Spinlock) noexcept { + while (_InterlockedExchange(&_Spinlock, 1)) { + _YIELD_PROCESSOR(); + } +} + +inline void _Atomic_unlock_spinlock(long& _Spinlock) noexcept { +#if defined(_M_ARM) || defined(_M_ARM64) + _Memory_barrier(); + __iso_volatile_store32(reinterpret_cast(&_Spinlock), 0); + _Memory_barrier(); +#else // ^^^ ARM32/ARM64 hardware / x86/x64 hardware vvv + _InterlockedExchange(&_Spinlock, 0); +#endif // hardware +} + +class _Spinlock_guard { +public: + explicit _Spinlock_guard(long& _Spinlock_) noexcept : _Spinlock(_Spinlock_) { + _Atomic_lock_spinlock(_Spinlock); + } + + ~_Spinlock_guard() { + _Atomic_unlock_spinlock(_Spinlock); + } + + _Spinlock_guard(const _Spinlock_guard&) = delete; + _Spinlock_guard& operator=(const _Spinlock_guard&) = delete; + +private: + long& _Spinlock; +}; + +#if _HAS_CXX20 +inline bool __stdcall _Atomic_wait_compare_non_lock_free( + const void* _Storage, void* _Comparand, size_t _Size, void* _Spinlock_raw) noexcept { + long& _Spinlock = *static_cast(_Spinlock_raw); + _Atomic_lock_spinlock(_Spinlock); + const auto _Cmp_result = _CSTD memcmp(_Storage, _Comparand, _Size); + _Atomic_unlock_spinlock(_Spinlock); + return _Cmp_result == 0; +} +#endif // _HAS_CXX20 +#endif // TRANSITION, ABI + +template struct _Atomic_storage { // Provides operations common to all specializations of std::atomic, load, store, exchange, and CAS. // Locking version used when hardware has no atomic operations for sizeof(_Ty). @@ -427,21 +507,59 @@ struct _Atomic_storage { return _Result; } +#if _HAS_CXX20 + void wait(_Ty _Expected, memory_order = memory_order_seq_cst) const noexcept { + const auto _Storage_ptr = _STD addressof(_Storage); + const auto _Expected_ptr = _STD addressof(_Expected); + for (;;) { + { + _Spinlock_guard _Lock{_Spinlock}; + if (_CSTD memcmp(_Storage_ptr, _Expected_ptr, sizeof(_Ty)) != 0) { + // contents differed, we might be done, check for padding +#if _CMPXCHG_MASK_OUT_PADDING_BITS + if constexpr (_Might_have_non_value_bits<_Ty>) { + _Storage_for<_Ty> _Local; + const auto _Local_ptr = _Local._Ptr(); + _CSTD memcpy(_Local_ptr, _Storage_ptr, sizeof(_Ty)); + __builtin_zero_non_value_bits(_Local_ptr); + __builtin_zero_non_value_bits(_Expected_ptr); + if (_CSTD memcmp(_Local_ptr, _Expected_ptr, sizeof(_Ty)) == 0) { + // _Storage differs from _Expected only by padding; copy the padding from _Storage into + // _Expected + _CSTD memcpy(_Expected_ptr, _Storage_ptr, sizeof(_Ty)); + } else { + // truly different, we're done + return; + } + } else +#endif // #if _CMPXCHG_MASK_OUT_PADDING_BITS + { + return; + } + } + } // unlock + + __std_atomic_wait_indirect(_Storage_ptr, _Expected_ptr, sizeof(_Ty), &_Spinlock, + &_Atomic_wait_compare_non_lock_free, _Atomic_wait_no_timeout); + } + } + + void notify_one() noexcept { + __std_atomic_notify_one_indirect(_STD addressof(_Storage)); + } + + void notify_all() noexcept { + __std_atomic_notify_all_indirect(_STD addressof(_Storage)); + } +#endif // _HAS_CXX20 + #if 1 // TRANSITION, ABI void _Lock() const noexcept { // lock the spinlock - while (_InterlockedExchange(&_Spinlock, 1)) { - _YIELD_PROCESSOR(); - } + _Atomic_lock_spinlock(_Spinlock); } void _Unlock() const noexcept { // unlock the spinlock -#if defined(_M_ARM) || defined(_M_ARM64) - _Memory_barrier(); - __iso_volatile_store32(reinterpret_cast(&_Spinlock), 0); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64 hardware / x86/x64 hardware vvv - _InterlockedExchange(&_Spinlock, 0); -#endif // hardware + _Atomic_unlock_spinlock(_Spinlock); } private: @@ -452,23 +570,15 @@ public: #else // ^^^ don't break ABI / break ABI vvv void _Lock() const noexcept { // lock the spinlock - while (_InterlockedExchange8(&_Spinlock, 1)) { - _YIELD_PROCESSOR(); - } + _Smtx_lock_exclusive(&_Spinlock); } void _Unlock() const noexcept { // unlock the spinlock -#if defined(_M_ARM) || defined(_M_ARM64) - _Memory_barrier(); - __iso_volatile_store8(&_Spinlock, 0); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64 hardware / x86/x64 hardware vvv - _InterlockedExchange8(&_Spinlock, 0); -#endif // hardware + _Smtx_unlock_exclusive(&_Spinlock); } _Ty _Storage; - mutable char _Spinlock = 0; + mutable _Smtx_t _Spinlock = 0; #endif // TRANSITION, ABI }; @@ -572,6 +682,20 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics return false; } +#if _HAS_CXX20 + void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); + } + + void notify_one() noexcept { + __std_atomic_notify_one_direct(_STD addressof(_Storage)); + } + + void notify_all() noexcept { + __std_atomic_notify_all_direct(_STD addressof(_Storage)); + } +#endif // _HAS_CXX20 + _Atomic_padded<_Ty> _Storage; }; @@ -674,6 +798,20 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics return false; } +#if _HAS_CXX20 + void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); + } + + void notify_one() noexcept { + __std_atomic_notify_one_direct(_STD addressof(_Storage)); + } + + void notify_all() noexcept { + __std_atomic_notify_all_direct(_STD addressof(_Storage)); + } +#endif // _HAS_CXX20 + _Atomic_padded<_Ty> _Storage; }; @@ -774,6 +912,20 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics return false; } +#if _HAS_CXX20 + void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); + } + + void notify_one() noexcept { + __std_atomic_notify_one_direct(_STD addressof(_Storage)); + } + + void notify_all() noexcept { + __std_atomic_notify_all_direct(_STD addressof(_Storage)); + } +#endif // _HAS_CXX20 + _Atomic_padded<_Ty> _Storage; }; @@ -903,6 +1055,20 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics return false; } +#if _HAS_CXX20 + void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Atomic_wait_direct(this, _Atomic_reinterpret_as(_Expected), _Order); + } + + void notify_one() noexcept { + __std_atomic_notify_one_direct(_STD addressof(_Storage)); + } + + void notify_all() noexcept { + __std_atomic_notify_all_direct(_STD addressof(_Storage)); + } +#endif // _HAS_CXX20 + _Atomic_padded<_Ty> _Storage; }; @@ -998,6 +1164,16 @@ struct _Atomic_storage<_Ty, 16> { // lock-free using 16-byte intrinsics return _Result != 0; } +#if _HAS_CXX20 + void notify_one() noexcept { + __std_atomic_notify_one_indirect(_STD addressof(_Storage)); + } + + void notify_all() noexcept { + __std_atomic_notify_all_indirect(_STD addressof(_Storage)); + } +#endif // _HAS_CXX20 + struct _Int128 { alignas(16) long long _Low; long long _High; @@ -1802,6 +1978,23 @@ public: return this->compare_exchange_strong(_Expected, _Desired, _Combine_cas_memory_orders(_Success, _Failure)); } +#if _HAS_CXX20 + using _Base::wait; + void wait(const _Ty _Expected, const memory_order _Order = memory_order_seq_cst) const volatile noexcept { + const_cast(this)->_Base::wait(_Expected, _Order); + } + + using _Base::notify_one; + void notify_one() volatile noexcept { + const_cast(this)->_Base::notify_one(); + } + + using _Base::notify_all; + void notify_all() volatile noexcept { + const_cast(this)->_Base::notify_all(); + } +#endif // _HAS_CXX20 + operator _Ty() const volatile noexcept { static_assert(_Deprecate_non_lock_free_volatile<_Ty>, "Never fails"); return this->load(); @@ -2088,6 +2281,52 @@ _Ty atomic_fetch_xor_explicit( return _Mem->fetch_xor(_Value, _Order); } +#if _HAS_CXX20 +template +void atomic_wait(const volatile atomic<_Ty>* const _Mem, const typename atomic<_Ty>::value_type _Expected) noexcept +/* strengthened */ { + _Mem->wait(_Expected); +} + +template +void atomic_wait(const atomic<_Ty>* const _Mem, const typename atomic<_Ty>::value_type _Expected) noexcept +/* strengthened */ { + _Mem->wait(_Expected); +} + +template +void atomic_wait_explicit(const volatile atomic<_Ty>* const _Mem, const typename atomic<_Ty>::value_type _Expected, + const memory_order _Order) noexcept /* strengthened */ { + _Mem->wait(_Expected, _Order); +} + +template +void atomic_wait_explicit(const atomic<_Ty>* const _Mem, const typename atomic<_Ty>::value_type _Expected, + const memory_order _Order) noexcept /* strengthened */ { + _Mem->wait(_Expected, _Order); +} + +template +void atomic_notify_one(volatile atomic<_Ty>* const _Mem) noexcept /* strengthened */ { + _Mem->notify_one(); +} + +template +void atomic_notify_one(atomic<_Ty>* const _Mem) noexcept /* strengthened */ { + _Mem->notify_one(); +} + +template +void atomic_notify_all(volatile atomic<_Ty>* const _Mem) noexcept /* strengthened */ { + _Mem->notify_all(); +} + +template +void atomic_notify_all(atomic<_Ty>* const _Mem) noexcept /* strengthened */ { + _Mem->notify_all(); +} +#endif // _HAS_CXX20 + // ATOMIC TYPEDEFS using atomic_bool = atomic; @@ -2183,6 +2422,32 @@ struct atomic_flag { // flag with test-and-set semantics constexpr atomic_flag() noexcept = default; +#if _HAS_CXX20 + void wait(const bool _Expected, const memory_order _Order = memory_order_seq_cst) const noexcept { + _Storage.wait(static_cast(_Expected), _Order); + } + + void wait(const bool _Expected, const memory_order _Order = memory_order_seq_cst) const volatile noexcept { + _Storage.wait(static_cast(_Expected), _Order); + } + + void notify_one() noexcept { + _Storage.notify_one(); + } + + void notify_one() volatile noexcept { + _Storage.notify_one(); + } + + void notify_all() noexcept { + _Storage.notify_all(); + } + + void notify_all() volatile noexcept { + _Storage.notify_all(); + } +#endif // _HAS_CXX20 + #if 1 // TRANSITION, ABI atomic _Storage; #else // ^^^ don't break ABI / break ABI vvv @@ -2211,38 +2476,74 @@ _NODISCARD inline bool atomic_flag_test_explicit(const atomic_flag* const _Flag, } #endif // _HAS_CXX20 -inline bool atomic_flag_test_and_set(atomic_flag* _Flag) noexcept { +inline bool atomic_flag_test_and_set(atomic_flag* const _Flag) noexcept { return _Flag->test_and_set(); } -inline bool atomic_flag_test_and_set(volatile atomic_flag* _Flag) noexcept { +inline bool atomic_flag_test_and_set(volatile atomic_flag* const _Flag) noexcept { return _Flag->test_and_set(); } -inline bool atomic_flag_test_and_set_explicit(atomic_flag* _Flag, memory_order _Order) noexcept { +inline bool atomic_flag_test_and_set_explicit(atomic_flag* const _Flag, const memory_order _Order) noexcept { return _Flag->test_and_set(_Order); } -inline bool atomic_flag_test_and_set_explicit(volatile atomic_flag* _Flag, memory_order _Order) noexcept { +inline bool atomic_flag_test_and_set_explicit(volatile atomic_flag* const _Flag, const memory_order _Order) noexcept { return _Flag->test_and_set(_Order); } -inline void atomic_flag_clear(atomic_flag* _Flag) noexcept { +inline void atomic_flag_clear(atomic_flag* const _Flag) noexcept { _Flag->clear(); } -inline void atomic_flag_clear(volatile atomic_flag* _Flag) noexcept { +inline void atomic_flag_clear(volatile atomic_flag* const _Flag) noexcept { _Flag->clear(); } -inline void atomic_flag_clear_explicit(atomic_flag* _Flag, memory_order _Order) noexcept { +inline void atomic_flag_clear_explicit(atomic_flag* const _Flag, const memory_order _Order) noexcept { _Flag->clear(_Order); } -inline void atomic_flag_clear_explicit(volatile atomic_flag* _Flag, memory_order _Order) noexcept { +inline void atomic_flag_clear_explicit(volatile atomic_flag* const _Flag, const memory_order _Order) noexcept { _Flag->clear(_Order); } +#if _HAS_CXX20 +inline void atomic_flag_wait(const volatile atomic_flag* const _Flag, const bool _Expected) noexcept { + return _Flag->wait(_Expected); +} + +inline void atomic_flag_wait(const atomic_flag* const _Flag, const bool _Expected) noexcept { + return _Flag->wait(_Expected); +} + +inline void atomic_flag_wait_explicit( + const volatile atomic_flag* const _Flag, const bool _Expected, const memory_order _Order) noexcept { + return _Flag->wait(_Expected, _Order); +} + +inline void atomic_flag_wait_explicit( + const atomic_flag* const _Flag, const bool _Expected, const memory_order _Order) noexcept { + return _Flag->wait(_Expected, _Order); +} + +inline void atomic_flag_notify_one(volatile atomic_flag* const _Flag) noexcept { + return _Flag->notify_one(); +} + +inline void atomic_flag_notify_one(atomic_flag* const _Flag) noexcept { + return _Flag->notify_one(); +} + +inline void atomic_flag_notify_all(volatile atomic_flag* const _Flag) noexcept { + return _Flag->notify_all(); +} + +inline void atomic_flag_notify_all(atomic_flag* const _Flag) noexcept { + return _Flag->notify_all(); +} +#endif // _HAS_CXX20 + _STD_END #undef _CMPXCHG_MASK_OUT_PADDING_BITS diff --git a/stl/inc/execution b/stl/inc/execution index 728cc71f91..7fea657d08 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -29,11 +30,6 @@ _STL_DISABLE_CLANG_WARNINGS #undef new _EXTERN_C -// If on Windows XP, returns 1 (disabling parallelism); otherwise, returns the number of hardware threads available. -_NODISCARD unsigned int __stdcall __std_parallel_algorithms_hw_threads() noexcept; - -// Windows Vista thread pool interface; __std_parallel_algorithms_hw_threads must be called on the current -// thread before calling any of the below. #ifdef _M_CEE using __std_TP_WORK = void; using __std_TP_CALLBACK_INSTANCE = void; @@ -1117,7 +1113,7 @@ struct _Static_partitioned_all_of_family2 { // all_of/any_of/none_of task schedu template bool _All_of_family_parallel(_FwdIt _First, const _FwdIt _Last, _Pr _Pred) { // test if all elements in [_First, _Last) satisfy _Pred (or !_Pred if _Invert is true) in parallel - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_First, _Last); if (_Count >= 2) { // ... with at least 2 elements @@ -1228,7 +1224,7 @@ void for_each(_ExPo&&, _FwdIt _First, _FwdIt _Last, _Fn _Func) noexcept /* termi auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { // ... with at least 2 elements @@ -1275,7 +1271,7 @@ _FwdIt for_each_n(_ExPo&&, _FwdIt _First, const _Diff _Count_raw, _Fn _Func) noe if (0 < _Count) { auto _UFirst = _Get_unwrapped_n(_First, _Count); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1 && _Count >= 2) { // parallelize on multiprocessor machines with at least 2 elements _TRY_BEGIN auto _Passed_fn = _Pass_fn(_Func); @@ -1351,7 +1347,7 @@ template _FwdIt _Find_parallel_unchecked(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, const _Find_fx _Fx) { // find first matching _Val, potentially in parallel if (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_First, _Last); if (_Count >= 2) { @@ -1566,7 +1562,7 @@ _NODISCARD _FwdIt1 find_end(_ExPo&&, _FwdIt1 _First1, const _FwdIt1 _Last1, cons const auto _UFirst2 = _Get_unwrapped(_First2); const auto _ULast2 = _Get_unwrapped(_Last2); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { if constexpr (_Is_bidi_iter_v<_FwdIt1>) { const auto _Partition_start = @@ -1686,7 +1682,7 @@ _NODISCARD _FwdIt adjacent_find(_ExPo&&, _FwdIt _First, _FwdIt _Last, _Pr _Pred) auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = static_cast<_Iter_diff_t<_FwdIt>>(_STD distance(_UFirst, _ULast) - 1); if (_Count >= 2) { @@ -1747,7 +1743,7 @@ _NODISCARD _Iter_diff_t<_FwdIt> count_if(_ExPo&&, const _FwdIt _First, const _Fw auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { @@ -1911,7 +1907,7 @@ _NODISCARD pair<_FwdIt1, _FwdIt2> mismatch( const auto _UFirst1 = _Get_unwrapped(_First1); const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst1, _ULast1); const auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); @@ -1956,7 +1952,7 @@ _NODISCARD pair<_FwdIt1, _FwdIt2> mismatch( const auto _UFirst2 = _Get_unwrapped(_First2); const auto _ULast2 = _Get_unwrapped(_Last2); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = static_cast<_Iter_diff_t<_FwdIt1>>(_Distance_min(_UFirst1, _ULast1, _UFirst2, _ULast2)); if (_Count >= 2) { @@ -2031,7 +2027,7 @@ _NODISCARD bool equal(_ExPo&&, const _FwdIt1 _First1, const _FwdIt1 _Last1, cons const auto _UFirst1 = _Get_unwrapped(_First1); const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst1, _ULast1); const auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); @@ -2067,7 +2063,7 @@ _NODISCARD bool equal(_ExPo&&, const _FwdIt1 _First1, const _FwdIt1 _Last1, cons const auto _UFirst2 = _Get_unwrapped(_First2); const auto _ULast2 = _Get_unwrapped(_Last2); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = _Distance_any(_UFirst1, _ULast1, _UFirst2, _ULast2); if (_Count >= 2) { @@ -2157,7 +2153,7 @@ _NODISCARD _FwdItHaystack search(_ExPo&&, const _FwdItHaystack _First1, _FwdItHa const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { _Iter_diff_t<_FwdItHaystack> _Count; if constexpr (_Is_random_iter_v<_FwdItHaystack> && _Is_random_iter_v<_FwdItPat>) { @@ -2288,7 +2284,7 @@ _NODISCARD _FwdIt search_n(_ExPo&&, const _FwdIt _First, _FwdIt _Last, const _Di auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Haystack_count = _STD distance(_UFirst, _ULast); if (_Count > _Haystack_count) { @@ -2356,7 +2352,7 @@ _FwdIt2 transform(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _D auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst, _ULast); const auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -2436,7 +2432,7 @@ _FwdIt3 transform(_ExPo&&, const _FwdIt1 _First1, const _FwdIt1 _Last1, const _F const auto _UFirst1 = _Get_unwrapped(_First1); const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst1, _ULast1); const auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); @@ -2616,7 +2612,7 @@ _NODISCARD _FwdIt remove_if(_ExPo&&, _FwdIt _First, const _FwdIt _Last, _Pr _Pre auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { @@ -2758,7 +2754,7 @@ void sort(_ExPo&&, const _RanIt _First, const _RanIt _Last, _Pr _Pred) noexcept const _Iter_diff_t<_RanIt> _Ideal = _ULast - _UFirst; if constexpr (remove_reference_t<_ExPo>::_Parallelize) { size_t _Threads; - if (_Ideal > _ISORT_MAX && (_Threads = __std_parallel_algorithms_hw_threads()) > 1) { + if (_Ideal > _ISORT_MAX && (_Threads = thread::hardware_concurrency()) > 1) { // parallelize when input is large enough and we aren't on a uniprocessor machine _TRY_BEGIN _Sort_operation _Operation(_UFirst, _Pass_fn(_Pred), _Threads, _Ideal); // throws @@ -3022,7 +3018,7 @@ void stable_sort(_ExPo&&, const _BidIt _First, const _BidIt _Last, _Pr _Pred) no size_t _Hw_threads; bool _Attempt_parallelism; if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - _Hw_threads = __std_parallel_algorithms_hw_threads(); + _Hw_threads = thread::hardware_concurrency(); _Attempt_parallelism = _Hw_threads > 1; } else { _Attempt_parallelism = false; @@ -3103,7 +3099,7 @@ _NODISCARD _FwdIt is_sorted_until(_ExPo&&, _FwdIt _First, _FwdIt _Last, _Pr _Pre const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 3) { // ... with at least 3 elements @@ -3258,7 +3254,7 @@ _NODISCARD bool is_partitioned(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { // ... with at least 2 elements @@ -3331,7 +3327,7 @@ _NODISCARD _RanIt is_heap_until(_ExPo&&, _RanIt _First, _RanIt _Last, _Pr _Pred) const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _ULast - _UFirst; if (_Count >= 3) { // ... with at least 3 elements @@ -3580,7 +3576,7 @@ _FwdIt partition(_ExPo&&, _FwdIt _First, const _FwdIt _Last, _Pr _Pred) noexcept const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { @@ -3922,7 +3918,7 @@ _FwdIt3 set_intersection(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt2 _Firs if constexpr (remove_reference_t<_ExPo>::_Parallelize && _Is_random_iter_v<_FwdIt1> && _Is_random_iter_v<_FwdIt2> && _Is_random_iter_v<_FwdIt3>) { // only parallelize if desired, and all of the iterators given are random access - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const _Diff _Count1 = _ULast1 - _UFirst1; const _Diff _Count2 = _ULast2 - _UFirst2; @@ -4013,7 +4009,7 @@ _FwdIt3 set_difference(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt2 _First2 if constexpr (remove_reference_t<_ExPo>::_Parallelize && _Is_random_iter_v<_FwdIt1> && _Is_random_iter_v<_FwdIt2> && _Is_random_iter_v<_FwdIt3>) { // only parallelize if desired, and all of the iterators given are random access - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const _Diff _Count = _ULast1 - _UFirst1; if (_Count >= 2) { // ... with at least 2 elements in [_First1, _Last1) @@ -4107,7 +4103,7 @@ _NODISCARD _Ty reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, _Ty _Val auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst, _ULast); const auto _Chunks = _Get_least2_chunked_work_chunk_count(_Hw_threads, _Count); @@ -4209,7 +4205,7 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt auto _UFirst1 = _Get_unwrapped(_First1); const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst1, _ULast1); auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); @@ -4304,7 +4300,7 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst, _ULast); const auto _Chunks = _Get_least2_chunked_work_chunk_count(_Hw_threads, _Count); @@ -4456,7 +4452,7 @@ _FwdIt2 exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdI const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); const auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4601,7 +4597,7 @@ _FwdIt2 inclusive_scan(_ExPo&&, _FwdIt1 _First, _FwdIt1 _Last, _FwdIt2 _Dest, _B const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_First, _Last); auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4644,7 +4640,7 @@ _FwdIt2 inclusive_scan(_ExPo&&, _FwdIt1 _First, _FwdIt1 _Last, _FwdIt2 _Dest, _B const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4789,7 +4785,7 @@ _FwdIt2 transform_exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); const auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4936,7 +4932,7 @@ _FwdIt2 transform_inclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4982,7 +4978,7 @@ _FwdIt2 transform_inclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -5084,7 +5080,7 @@ _FwdIt2 adjacent_difference(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); + const size_t _Hw_threads = thread::hardware_concurrency(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines auto _Count = _STD distance(_UFirst, _ULast); const auto _UDest = _Get_unwrapped_n(_Dest, _Count); diff --git a/stl/inc/memory b/stl/inc/memory index 5b65f83450..67b1cc410d 100644 --- a/stl/inc/memory +++ b/stl/inc/memory @@ -3100,6 +3100,15 @@ _CXX20_DEPRECATE_OLD_SHARED_PTR_ATOMIC_SUPPORT bool atomic_compare_exchange_stro template class alignas(2 * sizeof(void*)) _Atomic_ptr_base { // overalignment is to allow potential future use of cmpxchg16b + + static_assert(alignof(_Ref_count_base) >= (1 << 2), "Two bits don't fit as low bits"); + + static constexpr uintptr_t _Lock_mask = 3; + static constexpr uintptr_t _Not_locked = 0; + static constexpr uintptr_t _Locked_notify_not_needed = 1; + static constexpr uintptr_t _Locked_notify_needed = 2; + static constexpr uintptr_t _Ptr_value_mask = ~_Lock_mask; + protected: constexpr _Atomic_ptr_base() noexcept = default; @@ -3107,20 +3116,42 @@ protected: : _Ptr(_Px), _Repptr(reinterpret_cast(_Ref)) {} _NODISCARD _Ref_count_base* _Lock_and_load() const noexcept { - constexpr uintptr_t _Low_bit = 1; - uintptr_t _Rep = _Repptr.load(memory_order::relaxed); + uintptr_t _Rep = _Repptr.load(memory_order::relaxed); for (;;) { - _Rep &= ~_Low_bit; - if (_Repptr.compare_exchange_weak(_Rep, _Rep | _Low_bit)) { - return reinterpret_cast<_Ref_count_base*>(_Rep); + switch (_Rep & _Lock_mask) { + case _Not_locked: // Can try to lock now + if (_Repptr.compare_exchange_weak(_Rep, _Rep | _Locked_notify_not_needed)) { + return reinterpret_cast<_Ref_count_base*>(_Rep); + } + _YIELD_PROCESSOR(); + break; + + case _Locked_notify_not_needed: // Try to set "notify needed" and wait + if (!_Repptr.compare_exchange_weak(_Rep, (_Rep & _Ptr_value_mask) | _Locked_notify_needed)) { + // Failed to put notify needed flag on, try again + _YIELD_PROCESSOR(); + break; + } + _Rep = (_Rep & _Ptr_value_mask) | _Locked_notify_needed; + [[fallthrough]]; + + case _Locked_notify_needed: // "Notify needed" is already set, just wait + _Repptr.wait(_Rep, memory_order::relaxed); + _Rep = _Repptr.load(memory_order::relaxed); + break; + + default: // Unrecognized bit pattern + _CSTD abort(); } - - _YIELD_PROCESSOR(); } } void _Store_and_unlock(_Ref_count_base* const _Value) const noexcept { - _Repptr.store(reinterpret_cast(_Value)); + uintptr_t _Rep = _Repptr.exchange(reinterpret_cast(_Value)); + if ((_Rep & _Lock_mask) == _Locked_notify_needed) { + // As we don't count waiters, every waiter is notified, and then some may re-request notification + _Repptr.notify_all(); + } } _Ty* _Ptr = nullptr; diff --git a/stl/inc/xatomic_wait.h b/stl/inc/xatomic_wait.h new file mode 100644 index 0000000000..266aae0c9f --- /dev/null +++ b/stl/inc/xatomic_wait.h @@ -0,0 +1,72 @@ +// xatomic_wait.h internal header + +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once +#ifndef _XATOMIC_WAIT_H +#define _XATOMIC_WAIT_H +#include +#if _STL_COMPILER_PREPROCESSOR + +#include +#include + +#pragma pack(push, _CRT_PACKING) +#pragma warning(push, _STL_WARNING_LEVEL) +#pragma warning(disable : _STL_DISABLED_WARNINGS) +_STL_DISABLE_CLANG_WARNINGS +#pragma push_macro("new") +#undef new + +_INLINE_VAR constexpr unsigned long long _Atomic_wait_no_deadline = 0xFFFF'FFFF'FFFF'FFFF; +_INLINE_VAR constexpr unsigned long _Atomic_wait_no_timeout = 0xFFFF'FFFF; // Pass as partial timeout + +_EXTERN_C +enum class __std_atomic_api_level : unsigned long { + __not_set, + __detecting, + __has_srwlock, + __has_wait_on_address, +}; + +// This function allows testing the atomic wait support while always using the APIs for a platform with fewer +// capabilities; it attempts to lock the APIs used to the level `_Requested_api_level`, and returns the actual API level +// in use. Once the API level has been set by calling this function (or detected by a call to one of the atomic wait +// functions), it can no longer be changed. +__std_atomic_api_level __stdcall __std_atomic_set_api_level(__std_atomic_api_level _Requested_api_level) noexcept; + +// Support for atomic waits. +// The "direct" functions are used when the underlying infrastructure can use WaitOnAddress directly; that is, _Size is +// 1, 2, 4, or 8. The contract is the same as the WaitOnAddress function from the Windows SDK. If WaitOnAddress is not +// available on the current platform, falls back to a similar solution based on SRWLOCK and CONDITION_VARIABLE. +int __stdcall __std_atomic_wait_direct( + const void* _Storage, void* _Comparand, size_t _Size, unsigned long _Remaining_timeout) noexcept; +void __stdcall __std_atomic_notify_one_direct(const void* _Storage) noexcept; +void __stdcall __std_atomic_notify_all_direct(const void* _Storage) noexcept; + +// The "indirect" functions are used when the size is not 1, 2, 4, or 8; these notionally wait on another value which is +// of one of those sizes whose value changes upon notify, hence "indirect". (As of 2020-07-24, this always uses the +// fallback SRWLOCK and CONDITION_VARIABLE implementation but that is not contractual.) +using _Atomic_wait_indirect_equal_callback_t = bool(__stdcall*)( + const void* _Storage, void* _Comparand, size_t _Size, void* _Param) noexcept; + +int __stdcall __std_atomic_wait_indirect(const void* _Storage, void* _Comparand, size_t _Size, void* _Param, + _Atomic_wait_indirect_equal_callback_t _Are_equal, unsigned long _Remaining_timeout) noexcept; +void __stdcall __std_atomic_notify_one_indirect(const void* _Storage) noexcept; +void __stdcall __std_atomic_notify_all_indirect(const void* _Storage) noexcept; + +// These functions convert a duration into a time point in order to tolerate spurious wakes in atomic wait, and then +// convert back from the time point to individual wait attempts (which are limited by DWORD milliseconds to a length of +// ~49 days) +unsigned long long __stdcall __std_atomic_wait_get_deadline(unsigned long long _Timeout) noexcept; +unsigned long __stdcall __std_atomic_wait_get_remaining_timeout(unsigned long long _Deadline) noexcept; + +_END_EXTERN_C + +#pragma pop_macro("new") +_STL_RESTORE_CLANG_WARNINGS +#pragma warning(pop) +#pragma pack(pop) +#endif // _STL_COMPILER_PREPROCESSOR +#endif // _XATOMIC_WAIT_H diff --git a/stl/inc/yvals.h b/stl/inc/yvals.h index 1a70aa17ea..ea0b53f457 100644 --- a/stl/inc/yvals.h +++ b/stl/inc/yvals.h @@ -306,6 +306,22 @@ _STL_DISABLE_CLANG_WARNINGS #define _LOCK_DEBUG 3 #define _LOCK_AT_THREAD_EXIT 4 +#ifndef _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE +#if _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WIN8 +#define _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE 1 +#else // ^^^ _STL_WIN32_WINNT >= _STL_WIN32_WINNT_WIN8 // _STL_WIN32_WINNT < _STL_WIN32_WINNT_WIN8 vvv +#define _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE 0 +#endif // ^^^ _STL_WIN32_WINNT < _STL_WIN32_WINNT_WIN8 ^^^ +#endif // _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE + +#ifndef _ALLOW_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE_MISMATCH +#if _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE +#pragma detect_mismatch("_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE", "1") +#else +#pragma detect_mismatch("_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE", "0") +#endif +#endif // !_ALLOW_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE_MISMATCH + #ifdef __cplusplus _STD_BEGIN enum _Uninitialized { // tag for suppressing initialization diff --git a/stl/inc/yvals_core.h b/stl/inc/yvals_core.h index bc55f3dcc2..526ae51d63 100644 --- a/stl/inc/yvals_core.h +++ b/stl/inc/yvals_core.h @@ -1138,6 +1138,7 @@ #define __cpp_lib_atomic_float 201711L #define __cpp_lib_atomic_lock_free_type_aliases 201907L #define __cpp_lib_atomic_shared_ptr 201711L +#define __cpp_lib_atomic_wait 201907L #define __cpp_lib_bind_front 201907L #define __cpp_lib_bit_cast 201806L #define __cpp_lib_bitops 201907L @@ -1261,5 +1262,20 @@ compiler option, or define _ALLOW_RTCc_IN_STL to acknowledge that you have recei #error In yvals_core.h, defined(MRTDLL) implies defined(_M_CEE_PURE); !defined(_M_CEE_PURE) implies !defined(MRTDLL) #endif // defined(MRTDLL) && !defined(_M_CEE_PURE) +#define _STL_WIN32_WINNT_WINXP 0x0501 // _WIN32_WINNT_WINXP from sdkddkver.h +#define _STL_WIN32_WINNT_VISTA 0x0600 // _WIN32_WINNT_VISTA from sdkddkver.h +#define _STL_WIN32_WINNT_WIN8 0x0602 // _WIN32_WINNT_WIN8 from sdkddkver.h + +// Note that the STL DLL builds will set this to XP for ABI compatibility with VS2015 which supported XP. +#ifndef _STL_WIN32_WINNT +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_ONECORE) || defined(_CRT_APP) +// The first ARM or OneCore or App Windows was Windows 8 +#define _STL_WIN32_WINNT _STL_WIN32_WINNT_WIN8 +#else // ^^^ default to Win8 // default to Vista vvv +// The earliest Windows supported by this implementation is Windows Vista +#define _STL_WIN32_WINNT _STL_WIN32_WINNT_VISTA +#endif // ^^^ !defined(_M_ARM) && !defined(_M_ARM64) && !defined(_ONECORE) && !defined(_CRT_APP) ^^^ +#endif // _STL_WIN32_WINNT + #endif // _STL_COMPILER_PREPROCESSOR #endif // _YVALS_CORE_H_ diff --git a/stl/msbuild/stl_atomic_wait/dirs.proj b/stl/msbuild/stl_atomic_wait/dirs.proj new file mode 100644 index 0000000000..7d349d37ff --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/dirs.proj @@ -0,0 +1,15 @@ + + + + + + + + + + + + diff --git a/stl/msbuild/stl_atomic_wait/md/dirs.proj b/stl/msbuild/stl_atomic_wait/md/dirs.proj new file mode 100644 index 0000000000..8376c0a191 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/md/dirs.proj @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + diff --git a/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_app/msvcp_atomic_wait.nativeproj b/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_app/msvcp_atomic_wait.nativeproj new file mode 100644 index 0000000000..e22fbab9c1 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_app/msvcp_atomic_wait.nativeproj @@ -0,0 +1,15 @@ + + + + + + md + app + + + + + diff --git a/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_kernel32/msvcp_atomic_wait.nativeproj b/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_kernel32/msvcp_atomic_wait.nativeproj new file mode 100644 index 0000000000..e009997783 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_kernel32/msvcp_atomic_wait.nativeproj @@ -0,0 +1,15 @@ + + + + + + md + kernel32 + + + + + diff --git a/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_netfx/msvcp_atomic_wait.nativeproj b/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_netfx/msvcp_atomic_wait.nativeproj new file mode 100644 index 0000000000..5b3f631fc0 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_netfx/msvcp_atomic_wait.nativeproj @@ -0,0 +1,15 @@ + + + + + + md + netfx + + + + + diff --git a/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_onecore/msvcp_atomic_wait.nativeproj b/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_onecore/msvcp_atomic_wait.nativeproj new file mode 100644 index 0000000000..774d8aa1b5 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/md/msvcp_atomic_wait_onecore/msvcp_atomic_wait.nativeproj @@ -0,0 +1,15 @@ + + + + + + md + onecore + + + + + diff --git a/stl/msbuild/stl_atomic_wait/msvcp_atomic_wait.settings.targets b/stl/msbuild/stl_atomic_wait/msvcp_atomic_wait.settings.targets new file mode 100644 index 0000000000..9590f00030 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/msvcp_atomic_wait.settings.targets @@ -0,0 +1,90 @@ + + + + + + p_atomic_wait + + true + true + true + + DYNLINK + + + + + + _clr + + _app + _clr$(NetFxVerX) + + msvcp$(VCToolsProdVerSuffix)$(BuildSuffix)_atomic_wait$(MsvcpFlavorSuffix) + msvcprt$(BuildSuffix)_atomic_wait$(ClrLibSuffix) + $(LibOutputFileName).lib + + _VCRT_ALLOW_INTERNALS;$(ClDefines) + $(ClDefines);_CRT_APP + + false + true + $(IntermediateOutputDirectory) + $(CrtBuildDir)\msvcprt_atomic_wait$(BuildSuffix).$(MsvcpFlavor).import_only.lib + $(LibOutputFileName).$(MsvcpFlavor) + $(IntermediateOutputDirectory)\$(DllDefName).def + + true + $(OutputPath)\$(OutputName)$(_PDB_VER_NAME_)$(DllPdbFlavorSuffix) + + -debugtype:cv,fixup $(LinkAdditionalOptions) + -opt:ref,icf=3 $(LinkAdditionalOptions) + -opt:ref,noicf $(LinkAdditionalOptions) + -nodefaultlib:libcpmt$(BuildSuffix).lib $(LinkAdditionalOptions) + -nodefaultlib:$(LibOutputFile) $(LinkAdditionalOptions) + + true + true + + + + + LIBRARYNAME=$(OutputName.ToUpper()) + + + $(IntermediateOutputDirectory) + $(IntermediateOutputDirectory) + $(DllDefName) + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/stl/msbuild/stl_atomic_wait/msvcprt_atomic_wait.rc b/stl/msbuild/stl_atomic_wait/msvcprt_atomic_wait.rc new file mode 100644 index 0000000000..b43a7e1238 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/msvcprt_atomic_wait.rc @@ -0,0 +1,23 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +////////////////////////////////////////////////////////////////////////////////////// +// +// msvcprt_atomic_wait.rc : Defines the version resource for the C++ Runtime Library "_atomic_wait" DLL +// +////////////////////////////////////////////////////////////////////////////////////// + +#include "winver.h" // extract from windows header +#include "verstamp.h" + +#define MKARGSTR2(X) #X +#define MKARGSTR(X) MKARGSTR2(X) + +#define VER_FILETYPE VFT_DLL +#define VER_FILESUBTYPE VFT_UNKNOWN + +#define VER_FILEDESCRIPTION_STR "Microsoft\256 C Runtime Library _atomic_wait\0" +#define VER_INTERNALNAME_STR MKARGSTR(SXS_TARGET) +#define VER_ORIGINALFILENAME_STR MKARGSTR(SXS_TARGET) + +#include diff --git a/stl/msbuild/stl_atomic_wait/stl_atomic_wait.files.settings.targets b/stl/msbuild/stl_atomic_wait/stl_atomic_wait.files.settings.targets new file mode 100644 index 0000000000..0de6759e9c --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/stl_atomic_wait.files.settings.targets @@ -0,0 +1,15 @@ + + + + + + nativecpp + + + diff --git a/stl/msbuild/stl_atomic_wait/xmd/dirs.proj b/stl/msbuild/stl_atomic_wait/xmd/dirs.proj new file mode 100644 index 0000000000..c6bf75b366 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/xmd/dirs.proj @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + diff --git a/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_app/msvcp_atomic_wait.nativeproj b/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_app/msvcp_atomic_wait.nativeproj new file mode 100644 index 0000000000..6d99c3ab36 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_app/msvcp_atomic_wait.nativeproj @@ -0,0 +1,15 @@ + + + + + + xmd + app + + + + + diff --git a/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_kernel32/msvcp_atomic_wait.nativeproj b/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_kernel32/msvcp_atomic_wait.nativeproj new file mode 100644 index 0000000000..9fe52b880d --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_kernel32/msvcp_atomic_wait.nativeproj @@ -0,0 +1,15 @@ + + + + + + xmd + kernel32 + + + + + diff --git a/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_netfx/msvcp_atomic_wait.nativeproj b/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_netfx/msvcp_atomic_wait.nativeproj new file mode 100644 index 0000000000..e0629c9e50 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_netfx/msvcp_atomic_wait.nativeproj @@ -0,0 +1,15 @@ + + + + + + xmd + netfx + + + + + diff --git a/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_onecore/msvcp_atomic_wait.nativeproj b/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_onecore/msvcp_atomic_wait.nativeproj new file mode 100644 index 0000000000..54f964b787 --- /dev/null +++ b/stl/msbuild/stl_atomic_wait/xmd/msvcp_atomic_wait_onecore/msvcp_atomic_wait.nativeproj @@ -0,0 +1,15 @@ + + + + + + xmd + onecore + + + + + diff --git a/stl/msbuild/stl_base/stl.files.settings.targets b/stl/msbuild/stl_base/stl.files.settings.targets index 4c371170a5..5fd1b26310 100644 --- a/stl/msbuild/stl_base/stl.files.settings.targets +++ b/stl/msbuild/stl_base/stl.files.settings.targets @@ -12,8 +12,10 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception nativecpp @@ -170,7 +172,6 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception $(CrtRoot)\github\stl\src\filesystem.cpp; $(CrtRoot)\github\stl\src\locale0_implib.cpp; $(CrtRoot)\github\stl\src\nothrow.cpp; - $(CrtRoot)\github\stl\src\parallel_algorithms.cpp; $(CrtRoot)\github\stl\src\sharedmutex.cpp; $(CrtRoot)\github\stl\src\syserror_import_lib.cpp; $(CrtRoot)\github\stl\src\vector_algorithms.cpp; diff --git a/stl/msbuild/stl_post/msvcp_post.settings.targets b/stl/msbuild/stl_post/msvcp_post.settings.targets index 6d09f6244f..3271bca0e9 100644 --- a/stl/msbuild/stl_post/msvcp_post.settings.targets +++ b/stl/msbuild/stl_post/msvcp_post.settings.targets @@ -50,10 +50,13 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception Include="$(CrtRoot)\github\stl\msbuild\stl_2\$(CrtBuildModel)\msvcp_2_$(MsvcpFlavor)\msvcp_2.nativeproj"/> + + diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp new file mode 100644 index 0000000000..d16f53f219 --- /dev/null +++ b/stl/src/atomic_wait.cpp @@ -0,0 +1,334 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// implement atomic wait / notify_one / notify_all + +// clang-format off + +#include +#include +#include +#include +#include + +// clang-format on + +namespace { + + constexpr size_t _Wait_table_size_power = 8; + constexpr size_t _Wait_table_size = 1 << _Wait_table_size_power; + constexpr size_t _Wait_table_index_mask = _Wait_table_size - 1; + + struct _Wait_context { + const void* _Storage; // Pointer to wait on + _Wait_context* _Next; + _Wait_context* _Prev; + CONDITION_VARIABLE _Condition; + }; + + struct _Guarded_wait_context : _Wait_context { + _Guarded_wait_context(const void* _Storage_, _Wait_context* const _Head) noexcept + : _Wait_context{_Storage_, _Head, _Head->_Prev, CONDITION_VARIABLE_INIT} { + _Prev->_Next = this; + _Next->_Prev = this; + } + + ~_Guarded_wait_context() { + const auto _Next_local = _Next; + const auto _Prev_local = _Prev; + _Next->_Prev = _Prev_local; + _Prev->_Next = _Next_local; + } + + _Guarded_wait_context(const _Guarded_wait_context&) = delete; + _Guarded_wait_context& operator=(const _Guarded_wait_context&) = delete; + }; + + class _SrwLock_guard { + public: + explicit _SrwLock_guard(SRWLOCK& _Locked_) noexcept : _Locked(&_Locked_) { + AcquireSRWLockExclusive(_Locked); + } + + ~_SrwLock_guard() { + ReleaseSRWLockExclusive(_Locked); + } + + _SrwLock_guard(const _SrwLock_guard&) = delete; + _SrwLock_guard& operator=(const _SrwLock_guard&) = delete; + + private: + SRWLOCK* _Locked; + }; + + +#pragma warning(push) +#pragma warning(disable : 4324) // structure was padded due to alignment specifier + struct alignas(_STD hardware_destructive_interference_size) _Wait_table_entry { + SRWLOCK _Lock = SRWLOCK_INIT; + _Wait_context _Wait_list_head = {nullptr, &_Wait_list_head, &_Wait_list_head, CONDITION_VARIABLE_INIT}; + + constexpr _Wait_table_entry() noexcept = default; + }; +#pragma warning(pop) + + [[nodiscard]] _Wait_table_entry& _Atomic_wait_table_entry(const void* const _Storage) noexcept { + static _Wait_table_entry wait_table[_Wait_table_size]; + auto index = reinterpret_cast<_STD uintptr_t>(_Storage); + index ^= index >> (_Wait_table_size_power * 2); + index ^= index >> _Wait_table_size_power; + return wait_table[index & _Wait_table_index_mask]; + } + + void _Assume_timeout() noexcept { +#ifdef _DEBUG + if (GetLastError() != ERROR_TIMEOUT) { + _CSTD abort(); + } +#endif // _DEBUG + } + +#if _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE + +#define __crtWaitOnAddress WaitOnAddress +#define __crtWakeByAddressSingle WakeByAddressSingle +#define __crtWakeByAddressAll WakeByAddressAll + +#else // ^^^ _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE / !_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE vvv + + + struct _Wait_functions_table { + _STD atomic _Pfn_WaitOnAddress{nullptr}; + _STD atomic _Pfn_WakeByAddressSingle{nullptr}; + _STD atomic _Pfn_WakeByAddressAll{nullptr}; + _STD atomic<__std_atomic_api_level> _Api_level{__std_atomic_api_level::__not_set}; + }; + + _Wait_functions_table _Wait_functions; + + void _Force_wait_functions_srwlock_only() noexcept { + auto _Local = _Wait_functions._Api_level.load(_STD memory_order_acquire); + if (_Local <= __std_atomic_api_level::__detecting) { + while (!_Wait_functions._Api_level.compare_exchange_weak(_Local, __std_atomic_api_level::__has_srwlock)) { + if (_Local > __std_atomic_api_level::__detecting) { + return; + } + } + } + } + + [[nodiscard]] __std_atomic_api_level _Init_wait_functions(__std_atomic_api_level _Level) { + while (!_Wait_functions._Api_level.compare_exchange_weak(_Level, __std_atomic_api_level::__detecting)) { + if (_Level > __std_atomic_api_level::__detecting) { + return _Level; + } + } + + _Level = __std_atomic_api_level::__has_srwlock; + + const HMODULE _Sync_module = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll"); + if (_Sync_module != nullptr) { + const auto _Wait_on_address = + reinterpret_cast(GetProcAddress(_Sync_module, "WaitOnAddress")); + const auto _Wake_by_address_single = + reinterpret_cast(GetProcAddress(_Sync_module, "WakeByAddressSingle")); + const auto _Wake_by_address_all = + reinterpret_cast(GetProcAddress(_Sync_module, "WakeByAddressAll")); + + if (_Wait_on_address != nullptr && _Wake_by_address_single != nullptr && _Wake_by_address_all != nullptr) { + _Wait_functions._Pfn_WaitOnAddress.store(_Wait_on_address, _STD memory_order_relaxed); + _Wait_functions._Pfn_WakeByAddressSingle.store(_Wake_by_address_single, _STD memory_order_relaxed); + _Wait_functions._Pfn_WakeByAddressAll.store(_Wake_by_address_all, _STD memory_order_relaxed); + _Level = __std_atomic_api_level::__has_wait_on_address; + } + } + + // for __has_srwlock, relaxed would have been enough, not distinguishing for consistency + _Wait_functions._Api_level.store(_Level, _STD memory_order_release); + return _Level; + } + + [[nodiscard]] __std_atomic_api_level _Acquire_wait_functions() noexcept { + auto _Level = _Wait_functions._Api_level.load(_STD memory_order_acquire); + if (_Level <= __std_atomic_api_level::__detecting) { + _Level = _Init_wait_functions(_Level); + } + + return _Level; + } + + [[nodiscard]] BOOL __crtWaitOnAddress( + volatile VOID* Address, PVOID CompareAddress, SIZE_T AddressSize, DWORD dwMilliseconds) { + const auto _Wait_on_address = _Wait_functions._Pfn_WaitOnAddress.load(_STD memory_order_relaxed); + return _Wait_on_address(Address, CompareAddress, AddressSize, dwMilliseconds); + } + + VOID __crtWakeByAddressSingle(PVOID Address) { + const auto _Wake_by_address_single = _Wait_functions._Pfn_WakeByAddressSingle.load(_STD memory_order_relaxed); + _Wake_by_address_single(Address); + } + + VOID __crtWakeByAddressAll(PVOID Address) { + const auto _Wake_by_address_all = _Wait_functions._Pfn_WakeByAddressAll.load(_STD memory_order_relaxed); + _Wake_by_address_all(Address); + } + + bool __stdcall _Atomic_wait_are_equal_direct_fallback( + const void* _Storage, void* _Comparand, size_t _Size, void*) noexcept { + switch (_Size) { + case 1: + return __iso_volatile_load8(static_cast(_Storage)) == *static_cast(_Comparand); + case 2: + return __iso_volatile_load16(static_cast(_Storage)) == *static_cast(_Comparand); + case 4: + return __iso_volatile_load32(static_cast(_Storage)) == *static_cast(_Comparand); + case 8: + return __iso_volatile_load64(static_cast(_Storage)) + == *static_cast(_Comparand); + default: + _CSTD abort(); + } + } +#endif // _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE +} // unnamed namespace + + +_EXTERN_C +int __stdcall __std_atomic_wait_direct(const void* const _Storage, void* const _Comparand, const size_t _Size, + const unsigned long _Remaining_timeout) noexcept { +#if _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE == 0 + if (_Acquire_wait_functions() < __std_atomic_api_level::__has_wait_on_address) { + return __std_atomic_wait_indirect( + _Storage, _Comparand, _Size, nullptr, &_Atomic_wait_are_equal_direct_fallback, _Remaining_timeout); + } +#endif // _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE == 0 + + const auto _Result = __crtWaitOnAddress( + const_cast(_Storage), const_cast(_Comparand), _Size, _Remaining_timeout); + + if (!_Result) { + _Assume_timeout(); + } + return _Result; +} + +void __stdcall __std_atomic_notify_one_direct(const void* const _Storage) noexcept { +#if _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE == 0 + if (_Acquire_wait_functions() < __std_atomic_api_level::__has_wait_on_address) { + __std_atomic_notify_one_indirect(_Storage); + return; + } +#endif // _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE = 0 + + __crtWakeByAddressSingle(const_cast(_Storage)); +} + +void __stdcall __std_atomic_notify_all_direct(const void* const _Storage) noexcept { +#if _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE == 0 + if (_Acquire_wait_functions() < __std_atomic_api_level::__has_wait_on_address) { + __std_atomic_notify_all_indirect(_Storage); + return; + } +#endif // _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE == 0 + + __crtWakeByAddressAll(const_cast(_Storage)); +} + +void __stdcall __std_atomic_notify_one_indirect(const void* const _Storage) noexcept { + auto& _Entry = _Atomic_wait_table_entry(_Storage); + _SrwLock_guard _Guard(_Entry._Lock); + _Wait_context* _Context = _Entry._Wait_list_head._Next; + for (; _Context != &_Entry._Wait_list_head; _Context = _Context->_Next) { + if (_Context->_Storage == _Storage) { + // Can't move wake outside SRWLOCKed section: SRWLOCK also protects the _Context itself + WakeAllConditionVariable(&_Context->_Condition); + break; + } + } +} + +void __stdcall __std_atomic_notify_all_indirect(const void* const _Storage) noexcept { + auto& _Entry = _Atomic_wait_table_entry(_Storage); + _SrwLock_guard _Guard(_Entry._Lock); + _Wait_context* _Context = _Entry._Wait_list_head._Next; + for (; _Context != &_Entry._Wait_list_head; _Context = _Context->_Next) { + if (_Context->_Storage == _Storage) { + // Can't move wake outside SRWLOCKed section: SRWLOCK also protects the _Context itself + WakeAllConditionVariable(&_Context->_Condition); + } + } +} + +int __stdcall __std_atomic_wait_indirect(const void* _Storage, void* _Comparand, size_t _Size, void* _Param, + _Atomic_wait_indirect_equal_callback_t _Are_equal, unsigned long _Remaining_timeout) noexcept { + auto& _Entry = _Atomic_wait_table_entry(_Storage); + + _SrwLock_guard _Guard(_Entry._Lock); + _Guarded_wait_context _Context{_Storage, &_Entry._Wait_list_head}; + for (;;) { + if (!_Are_equal(_Storage, _Comparand, _Size, _Param)) { // note: under lock to prevent lost wakes + return TRUE; + } + + if (!SleepConditionVariableSRW(&_Context._Condition, &_Entry._Lock, _Remaining_timeout, 0)) { + _Assume_timeout(); + return FALSE; + } + + if (_Remaining_timeout != _Atomic_wait_no_timeout) { + // spurious wake to recheck the clock + return TRUE; + } + } +} + +unsigned long long __stdcall __std_atomic_wait_get_deadline(const unsigned long long _Timeout) noexcept { + if (_Timeout == _Atomic_wait_no_deadline) { + return _Atomic_wait_no_deadline; + } else { + return GetTickCount64() + _Timeout; + } +} + +unsigned long __stdcall __std_atomic_wait_get_remaining_timeout(unsigned long long _Deadline) noexcept { + static_assert(_Atomic_wait_no_timeout == INFINITE, + "_Atomic_wait_no_timeout is passed directly to underlying API, so should match it"); + + if (_Deadline == _Atomic_wait_no_deadline) { + return INFINITE; + } + + const unsigned long long _Current_time = GetTickCount64(); + if (_Current_time >= _Deadline) { + return 0; + } + + unsigned long long _Remaining = _Deadline - _Current_time; + constexpr unsigned long _Ten_days = 864'000'000; + if (_Remaining > _Ten_days) { + return _Ten_days; + } + return static_cast(_Remaining); +} + +__std_atomic_api_level __stdcall __std_atomic_set_api_level(__std_atomic_api_level _Requested_api_level) noexcept { +#if _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE + (void) _Requested_api_level; + return __std_atomic_api_level::__has_wait_on_address; +#else // ^^^ _ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE // !_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE vvv + switch (_Requested_api_level) { + case __std_atomic_api_level::__not_set: + case __std_atomic_api_level::__detecting: + _CSTD abort(); + case __std_atomic_api_level::__has_srwlock: + _Force_wait_functions_srwlock_only(); + break; + case __std_atomic_api_level::__has_wait_on_address: + default: // future compat: new header using an old DLL will get the highest requested level supported + break; + } + + return _Acquire_wait_functions(); +#endif // !_ATOMIC_WAIT_ON_ADDRESS_STATICALLY_AVAILABLE +} +_END_EXTERN_C diff --git a/stl/src/msvcp_atomic_wait.src b/stl/src/msvcp_atomic_wait.src new file mode 100644 index 0000000000..ec335cc161 --- /dev/null +++ b/stl/src/msvcp_atomic_wait.src @@ -0,0 +1,25 @@ +; Copyright (c) Microsoft Corporation. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +; atomic wait satellite DLL definition + +LIBRARY LIBRARYNAME + +EXPORTS + __std_atomic_wait_get_deadline + __std_atomic_wait_get_remaining_timeout + __std_atomic_notify_all_direct + __std_atomic_notify_all_indirect + __std_atomic_notify_one_direct + __std_atomic_notify_one_indirect + __std_atomic_set_api_level + __std_atomic_wait_direct + __std_atomic_wait_indirect + __std_bulk_submit_threadpool_work + __std_close_threadpool_work + __std_create_threadpool_work + __std_execution_wait_on_uchar + __std_execution_wake_by_address_all + __std_parallel_algorithms_hw_threads + __std_submit_threadpool_work + __std_wait_for_threadpool_work_callbacks diff --git a/stl/src/parallel_algorithms.cpp b/stl/src/parallel_algorithms.cpp index c59577b7d5..b661116d31 100644 --- a/stl/src/parallel_algorithms.cpp +++ b/stl/src/parallel_algorithms.cpp @@ -3,297 +3,60 @@ // support for -#include #include -#include +#include +#include -// This must be as small as possible, because its contents are -// injected into the msvcprt.lib and msvcprtd.lib import libraries. -// Do not include or define anything else here. -// In particular, basic_string must not be included here. - -#if _STL_WIN32_WINNT >= _WIN32_WINNT_WIN8 -#pragma comment(lib, "synchronization") // for WaitOnAddress family -#endif // _STL_WIN32_WINNT >= _WIN32_WINNT_WIN8 - -#if _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 namespace { - struct _Parallel_init_info { - unsigned int _Hw_threads; -#if _STL_WIN32_WINNT < _WIN32_WINNT_VISTA - decltype(CreateThreadpoolWork)* _Pfn_CreateThreadpoolWork; - decltype(SubmitThreadpoolWork)* _Pfn_SubmitThreadpoolWork; - decltype(CloseThreadpoolWork)* _Pfn_CloseThreadpoolWork; - decltype(WaitForThreadpoolWorkCallbacks)* _Pfn_WaitForThreadpoolWorkCallbacks; - decltype(AcquireSRWLockExclusive)* _Pfn_AcquireSRWLockExclusive; // nullptr if _Pfn_WaitOnAddress is non-nullptr - decltype(ReleaseSRWLockExclusive)* _Pfn_ReleaseSRWLockExclusive; // ditto - decltype(SleepConditionVariableSRW)* _Pfn_SleepConditionVariableSRW; // ditto - decltype(WakeAllConditionVariable)* _Pfn_WakeAllConditionVariable; // ditto -#endif // _STL_WIN32_WINNT < _WIN32_WINNT_VISTA - decltype(WaitOnAddress)* _Pfn_WaitOnAddress; - decltype(WakeByAddressAll)* _Pfn_WakeByAddressAll; - }; - - _Parallel_init_info _Parallel_info; - - struct _Wait_semaphore { - SRWLOCK _Mtx; - CONDITION_VARIABLE _Cv; - }; - - constexpr int _Wait_table_size = 256; // one 4k page - constexpr int _Wait_table_max_index = _Wait_table_size - 1; - _Wait_semaphore _Wait_table[_Wait_table_size]{}; - size_t _Choose_wait_entry(const volatile void* _Target) noexcept { - auto _Num = reinterpret_cast(_Target); -#ifdef _WIN64 - _Num = (_Num & ((1ull << 32) - 1ull)) ^ (_Num >> 32); // down to 32 bits -#endif // _WIN64 - _Num = (_Num & ((1u << 16) - 1u)) ^ (_Num >> 16); // to 16 bits - _Num = (_Num & ((1u << 8) - 1u)) ^ (_Num >> 8); // to 8 bits - static_assert(_Wait_table_max_index == (1 << 8) - 1, "Bad wait table size assumption"); - return _Num; - } - unsigned char _Atomic_load_uchar(const volatile unsigned char* _Ptr) noexcept { // atomic load of unsigned char, copied from except ARM and ARM64 bits unsigned char _Value; -#if defined(_M_IX86) || defined(_M_X64) - _Value = *_Ptr; +#if defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64) + _Value = __iso_volatile_load8(reinterpret_cast(_Ptr)); _ReadWriteBarrier(); -#else // architecture, no ARM support as this is guarded by _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 +#else #error Unsupported architecture -#endif // architecture +#endif return _Value; } - - unsigned int _Atomic_load_uint(const volatile unsigned int* _Ptr) noexcept { - // atomic load of unsigned int, copied from except ARM and ARM64 bits - unsigned int _Value; -#if defined(_M_IX86) || defined(_M_X64) - _Value = *_Ptr; - _ReadWriteBarrier(); -#else // architecture, ditto no ARM support -#error Unsupported architecture -#endif // architecture - return _Value; - } - - void _Atomic_store_uint(volatile unsigned int* _Tgt, unsigned int _Value) { - // atomic store of unsigned int, copied from -#if defined(_M_IX86) || defined(_M_X64) - _InterlockedExchange(reinterpret_cast(_Tgt), static_cast(_Value)); -#else // architecture, ditto no ARM support -#error Unsupported architecture -#endif // architecture - } - - bool _Initialize_parallel_init_info() { // try to fill in _Parallel_info -#if !(defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)) -#error Check hardware assumption: Assumes that write races of identical values to pointer-sized variables are benign -#endif // !(defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM) || defined(_M_ARM64)) - - HMODULE _Kernel32 = GetModuleHandleW(L"kernel32.dll"); -#if _STL_WIN32_WINNT < _WIN32_WINNT_VISTA - _Parallel_info._Pfn_CreateThreadpoolWork = - reinterpret_cast(GetProcAddress(_Kernel32, "CreateThreadpoolWork")); - _Parallel_info._Pfn_SubmitThreadpoolWork = - reinterpret_cast(GetProcAddress(_Kernel32, "SubmitThreadpoolWork")); - _Parallel_info._Pfn_CloseThreadpoolWork = - reinterpret_cast(GetProcAddress(_Kernel32, "CloseThreadpoolWork")); - _Parallel_info._Pfn_WaitForThreadpoolWorkCallbacks = - reinterpret_cast( - GetProcAddress(_Kernel32, "WaitForThreadpoolWorkCallbacks")); - if (!_Parallel_info._Pfn_CreateThreadpoolWork || !_Parallel_info._Pfn_SubmitThreadpoolWork - || !_Parallel_info._Pfn_CloseThreadpoolWork || !_Parallel_info._Pfn_WaitForThreadpoolWorkCallbacks) { - // don't parallelize without the Windows Vista threadpool - return false; - } -#endif // _STL_WIN32_WINNT < _WIN32_WINNT_VISTA - - HMODULE _KernelBase = GetModuleHandleW(L"kernelbase.dll"); - if (_KernelBase) { - _Parallel_info._Pfn_WaitOnAddress = - reinterpret_cast(GetProcAddress(_KernelBase, "WaitOnAddress")); - _Parallel_info._Pfn_WakeByAddressAll = - reinterpret_cast(GetProcAddress(_KernelBase, "WakeByAddressAll")); - if ((_Parallel_info._Pfn_WaitOnAddress == nullptr) != (_Parallel_info._Pfn_WakeByAddressAll == nullptr)) { - // if we don't have both we can use neither - _Parallel_info._Pfn_WaitOnAddress = nullptr; - _Parallel_info._Pfn_WakeByAddressAll = nullptr; - } - } - -#if _STL_WIN32_WINNT < _WIN32_WINNT_VISTA - if (_Parallel_info._Pfn_WaitOnAddress) { // no need for SRWLOCK or CONDITION_VARIABLE if we have WaitOnAddress - return true; - } - - _Parallel_info._Pfn_AcquireSRWLockExclusive = - reinterpret_cast(GetProcAddress(_Kernel32, "AcquireSRWLockExclusive")); - _Parallel_info._Pfn_ReleaseSRWLockExclusive = - reinterpret_cast(GetProcAddress(_Kernel32, "ReleaseSRWLockExclusive")); - _Parallel_info._Pfn_SleepConditionVariableSRW = reinterpret_cast( - GetProcAddress(_Kernel32, "SleepConditionVariableSRW")); - _Parallel_info._Pfn_WakeAllConditionVariable = reinterpret_cast( - GetProcAddress(_Kernel32, "WakeAllConditionVariable")); - - if (!_Parallel_info._Pfn_AcquireSRWLockExclusive || !_Parallel_info._Pfn_ReleaseSRWLockExclusive - || !_Parallel_info._Pfn_SleepConditionVariableSRW || !_Parallel_info._Pfn_WakeAllConditionVariable) { - // no fallback for WaitOnAddress; shouldn't be possible as these - // APIs were added at the same time as the Windows Vista threadpool API - return false; - } -#endif // _STL_WIN32_WINNT < _WIN32_WINNT_VISTA - - return true; - } } // unnamed namespace -#endif // _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 - -static DWORD _Get_number_of_processors() noexcept { - SYSTEM_INFO _Info; - GetNativeSystemInfo(&_Info); - return _Info.dwNumberOfProcessors; -} extern "C" { +// TRANSITION, ABI _NODISCARD unsigned int __stdcall __std_parallel_algorithms_hw_threads() noexcept { -#if _STL_WIN32_WINNT >= _WIN32_WINNT_WIN8 - return _Get_number_of_processors(); -#else // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_WIN8 ^^^ / vvv _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 vvv - // _Atomic_load_uint enforces memory ordering in _Initialize_parallel_init_info: - unsigned int _Result = _Atomic_load_uint(&_Parallel_info._Hw_threads); - if (_Result == 0) { - if (_Initialize_parallel_init_info()) { - _Result = _Get_number_of_processors(); - } else { - _Result = 1; - } - - // _Atomic_store_uint enforces memory ordering in _Initialize_parallel_init_info: - _Atomic_store_uint(&_Parallel_info._Hw_threads, _Result); - } - - return _Result; -#endif // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 ^^^ + return _STD thread::hardware_concurrency(); } -// Relaxed reads of _Parallel_info below because __std_parallel_algorithms_hw_threads must be called -// before any of these on each thread. - _NODISCARD PTP_WORK __stdcall __std_create_threadpool_work( PTP_WORK_CALLBACK _Callback, void* _Context, PTP_CALLBACK_ENVIRON _Callback_environ) noexcept { -#if _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA return CreateThreadpoolWork(_Callback, _Context, _Callback_environ); -#else // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA ^^^ / vvv _STL_WIN32_WINNT < _WIN32_WINNT_VISTA vvv - return _Parallel_info._Pfn_CreateThreadpoolWork(_Callback, _Context, _Callback_environ); -#endif // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_VISTA ^^^ } void __stdcall __std_submit_threadpool_work(PTP_WORK _Work) noexcept { -#if _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA SubmitThreadpoolWork(_Work); -#else // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA ^^^ / vvv _STL_WIN32_WINNT < _WIN32_WINNT_VISTA vvv - _Parallel_info._Pfn_SubmitThreadpoolWork(_Work); -#endif // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_VISTA ^^^ } void __stdcall __std_bulk_submit_threadpool_work(PTP_WORK _Work, const size_t _Submissions) noexcept { -#if _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA for (size_t _Idx = 0; _Idx < _Submissions; ++_Idx) { SubmitThreadpoolWork(_Work); } -#else // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA ^^^ / vvv _STL_WIN32_WINNT < _WIN32_WINNT_VISTA vvv - const auto _Fn = _Parallel_info._Pfn_SubmitThreadpoolWork; - for (size_t _Idx = 0; _Idx < _Submissions; ++_Idx) { - _Fn(_Work); - } -#endif // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_VISTA ^^^ } void __stdcall __std_close_threadpool_work(PTP_WORK _Work) noexcept { -#if _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA CloseThreadpoolWork(_Work); -#else // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA ^^^ / vvv _STL_WIN32_WINNT < _WIN32_WINNT_VISTA vvv - _Parallel_info._Pfn_CloseThreadpoolWork(_Work); -#endif // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_VISTA ^^^ } void __stdcall __std_wait_for_threadpool_work_callbacks(PTP_WORK _Work, BOOL _Cancel) noexcept { -#if _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA WaitForThreadpoolWorkCallbacks(_Work, _Cancel); -#else // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA ^^^ / vvv _STL_WIN32_WINNT < _WIN32_WINNT_VISTA vvv - _Parallel_info._Pfn_WaitForThreadpoolWorkCallbacks(_Work, _Cancel); -#endif // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_VISTA ^^^ } void __stdcall __std_execution_wait_on_uchar(const volatile unsigned char* _Address, unsigned char _Compare) noexcept { -#if _STL_WIN32_WINNT >= _WIN32_WINNT_WIN8 - if (WaitOnAddress(const_cast(_Address), &_Compare, 1, INFINITE) == FALSE) { - // this API failing should only be possible with a timeout, and we asked for INFINITE - ::terminate(); - } -#else // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_WIN8 ^^^ / vvv _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 vvv - if (_Parallel_info._Pfn_WaitOnAddress) { - if (_Parallel_info._Pfn_WaitOnAddress(const_cast(_Address), &_Compare, 1, INFINITE) - == FALSE) { - ::terminate(); - } - - return; - } - - // fake WaitOnAddress via SRWLOCK and CONDITION_VARIABLE - for (int _Idx = 0; _Idx < 4096; ++_Idx) { // optimistic non-backoff spin - if (_Atomic_load_uchar(_Address) == _Compare) { - return; - } - } - - auto& _Wait_entry = _Wait_table[_Choose_wait_entry(_Address)]; -#if _STL_WIN32_WINNT < _WIN32_WINNT_VISTA - _Parallel_info._Pfn_AcquireSRWLockExclusive(&_Wait_entry._Mtx); - while (_Atomic_load_uchar(_Address) == _Compare) { - if (_Parallel_info._Pfn_SleepConditionVariableSRW(&_Wait_entry._Cv, &_Wait_entry._Mtx, INFINITE, 0) == 0) { - ::terminate(); - } - } - - _Parallel_info._Pfn_ReleaseSRWLockExclusive(&_Wait_entry._Mtx); -#else // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_VISTA ^^^ / vvv _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA vvv - AcquireSRWLockExclusive(&_Wait_entry._Mtx); - while (_Atomic_load_uchar(_Address) == _Compare) { - if (SleepConditionVariableSRW(&_Wait_entry._Cv, &_Wait_entry._Mtx, INFINITE, 0) == 0) { - ::terminate(); - } - } - - ReleaseSRWLockExclusive(&_Wait_entry._Mtx); -#endif // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA ^^^ -#endif // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 ^^^ + __std_atomic_wait_direct(const_cast(_Address), &_Compare, 1, _Atomic_wait_no_timeout); } void __stdcall __std_execution_wake_by_address_all(const volatile void* _Address) noexcept { -#if _STL_WIN32_WINNT >= _WIN32_WINNT_WIN8 - WakeByAddressAll(const_cast(_Address)); -#else // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_WIN8 ^^^ / vvv _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 vvv - if (_Parallel_info._Pfn_WakeByAddressAll) { - _Parallel_info._Pfn_WakeByAddressAll(const_cast(_Address)); - } else { - auto& _Wait_entry = _Wait_table[_Choose_wait_entry(_Address)]; -#if _STL_WIN32_WINNT < _WIN32_WINNT_VISTA - _Parallel_info._Pfn_AcquireSRWLockExclusive(&_Wait_entry._Mtx); - _Parallel_info._Pfn_ReleaseSRWLockExclusive(&_Wait_entry._Mtx); - _Parallel_info._Pfn_WakeAllConditionVariable(&_Wait_entry._Cv); -#else // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_VISTA ^^^ / vvv _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA vvv - AcquireSRWLockExclusive(&_Wait_entry._Mtx); - ReleaseSRWLockExclusive(&_Wait_entry._Mtx); - WakeAllConditionVariable(&_Wait_entry._Cv); -#endif // ^^^ _STL_WIN32_WINNT >= _WIN32_WINNT_VISTA ^^^ - } -#endif // ^^^ _STL_WIN32_WINNT < _WIN32_WINNT_WIN8 ^^^ + __std_atomic_notify_all_direct(const_cast(_Address)); } } // extern "C" diff --git a/tests/libcxx/expected_results.txt b/tests/libcxx/expected_results.txt index 88a11df330..366bc6b380 100644 --- a/tests/libcxx/expected_results.txt +++ b/tests/libcxx/expected_results.txt @@ -472,7 +472,6 @@ std/language.support/support.limits/support.limits.general/iterator.version.pass std/language.support/support.limits/support.limits.general/memory.version.pass.cpp FAIL # C++20 P1135R6 "The C++20 Synchronization Library" -std/atomics/atomics.types.operations/atomics.types.operations.wait/atomic_wait.pass.cpp FAIL std/thread/thread.barrier/arrive.pass.cpp FAIL std/thread/thread.barrier/arrive_and_drop.pass.cpp FAIL std/thread/thread.barrier/arrive_and_wait.pass.cpp FAIL diff --git a/tests/libcxx/skipped_tests.txt b/tests/libcxx/skipped_tests.txt index 133b8b06e8..ca2297c46b 100644 --- a/tests/libcxx/skipped_tests.txt +++ b/tests/libcxx/skipped_tests.txt @@ -472,7 +472,6 @@ language.support\support.limits\support.limits.general\iterator.version.pass.cpp language.support\support.limits\support.limits.general\memory.version.pass.cpp # C++20 P1135R6 "The C++20 Synchronization Library" -atomics\atomics.types.operations\atomics.types.operations.wait\atomic_wait.pass.cpp thread\thread.barrier\arrive.pass.cpp thread\thread.barrier\arrive_and_drop.pass.cpp thread\thread.barrier\arrive_and_wait.pass.cpp diff --git a/tests/std/include/test_atomic_wait.hpp b/tests/std/include/test_atomic_wait.hpp new file mode 100644 index 0000000000..248615cdf4 --- /dev/null +++ b/tests/std/include/test_atomic_wait.hpp @@ -0,0 +1,203 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include +#include +#include +#include +#include + +template +void test_atomic_wait_func(const UnderlyingType old_value, const UnderlyingType new_value, + const std::chrono::steady_clock::duration waiting_duration) { + constexpr int seq_max_size = 10; + char seq[seq_max_size + 1]; + std::atomic base = seq; + auto add_seq = [&](char ch) { + char* p = base.fetch_add(1, std::memory_order_relaxed); + assert(p - seq < seq_max_size); + *p = ch; + }; + + std::atomic a{old_value}; + a.wait(new_value); + + add_seq('1'); + + std::thread thd([&] { + std::this_thread::sleep_for(waiting_duration); + add_seq('2'); + a.notify_all(); + std::this_thread::sleep_for(waiting_duration); + add_seq('3'); + a.store(old_value); + a.notify_one(); + std::this_thread::sleep_for(waiting_duration); + add_seq('4'); + a.store(new_value); + a.notify_one(); + // timing assumption that the main thread evaluates the `wait(old_value)` before this timeout expires + std::this_thread::sleep_for(waiting_duration); + add_seq('6'); + }); + + a.wait(old_value); + const auto loaded = a.load(); + assert(memcmp(&loaded, &new_value, sizeof(UnderlyingType)) == 0); + + add_seq('5'); + + thd.join(); + + add_seq('\0'); + assert(strcmp(seq, "123456") == 0); +} + +template +void test_notify_all_notifies_all(const UnderlyingType old_value, const UnderlyingType new_value, + const std::chrono::steady_clock::duration waiting_duration) { + std::atomic c{old_value}; + const auto waitFn = [&c, old_value] { c.wait(old_value); }; + + std::thread w1{waitFn}; + std::thread w2{waitFn}; + std::thread w3{waitFn}; + + std::this_thread::sleep_for(waiting_duration); + c.store(new_value); + c.notify_all(); // if this doesn't really notify all, the following joins will deadlock + + w1.join(); + w2.join(); + w3.join(); +} + +template +void test_pad_bits(const std::chrono::steady_clock::duration waiting_duration) { + UnderlyingType old_value; + memset(&old_value, 0x66, sizeof(UnderlyingType)); + old_value.set(1); + + UnderlyingType same_old_value; + memset(&same_old_value, 0x99, sizeof(UnderlyingType)); + same_old_value.set(1); + + std::atomic c(old_value); + + bool trigger = false; + const auto waitFn = [&c, same_old_value, &trigger] { + c.wait(same_old_value); + trigger = true; + }; + + std::thread w1{waitFn}; + + std::this_thread::sleep_for(waiting_duration); + assert(!trigger); + + c.store(old_value); + c.notify_one(); + + std::this_thread::sleep_for(waiting_duration); + assert(!trigger); + + UnderlyingType new_value; + memset(&new_value, 0x99, sizeof(UnderlyingType)); + new_value.set(2); + c.store(new_value); + c.notify_one(); + + std::this_thread::sleep_for(waiting_duration); + assert(trigger); + + w1.join(); +} + +struct two_shorts { + short a; + short b; + + friend bool operator==(two_shorts, two_shorts) = delete; +}; + +struct three_chars { + char a; + char b; + char c; + + friend bool operator==(three_chars, three_chars) = delete; +}; + +struct big_char_like { + char value; + char unused[16]; + + explicit big_char_like(char value_) : value(value_), unused{} {} + + friend bool operator==(big_char_like, big_char_like) = delete; +}; + +template +struct with_padding_bits { + alignas(size) char value; + + void set(const char value_) { + value = value_; + } + + friend bool operator==(with_padding_bits, with_padding_bits) = delete; +}; + +inline void test_atomic_wait() { + // wait for all the threads to be waiting; if this value is too small the test might be ineffective but should not + // fail due to timing assumptions except where otherwise noted; if it is too large the test will only take longer + // than necessary + constexpr std::chrono::milliseconds waiting_duration{100}; + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func(1, 2, waiting_duration); + test_atomic_wait_func("1", "2", waiting_duration); + test_atomic_wait_func(two_shorts{1, 1}, two_shorts{1, 2}, waiting_duration); + test_atomic_wait_func(three_chars{1, 1, 3}, three_chars{1, 2, 3}, waiting_duration); + test_atomic_wait_func(big_char_like{'a'}, big_char_like{'b'}, waiting_duration); + + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all(1, 2, waiting_duration); + test_notify_all_notifies_all("1", "2", waiting_duration); + test_notify_all_notifies_all(two_shorts{1, 1}, two_shorts{1, 2}, waiting_duration); + test_notify_all_notifies_all(three_chars{1, 1, 3}, three_chars{1, 2, 3}, waiting_duration); + test_notify_all_notifies_all(big_char_like{'a'}, big_char_like{'b'}, waiting_duration); + +#ifndef __clang__ // TRANSITION, LLVM-46685 + test_pad_bits>(waiting_duration); + test_pad_bits>(waiting_duration); + test_pad_bits>(waiting_duration); + test_pad_bits>(waiting_duration); + test_pad_bits>(waiting_duration); +#endif // __clang__, TRANSITION, LLVM-46685 +} diff --git a/tests/std/test.lst b/tests/std/test.lst index a8a70ccb76..f6eda1e96f 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -311,6 +311,8 @@ tests\P0966R1_string_reserve_should_not_shrink tests\P1023R0_constexpr_for_array_comparisons tests\P1032R1_miscellaneous_constexpr tests\P1135R6_atomic_flag_test +tests\P1135R6_atomic_wait +tests\P1135R6_atomic_wait_vista tests\P1165R1_consistently_propagating_stateful_allocators tests\P1423R3_char8_t_remediation tests\P1645R1_constexpr_numeric diff --git a/tests/std/tests/P1135R6_atomic_wait/env.lst b/tests/std/tests/P1135R6_atomic_wait/env.lst new file mode 100644 index 0000000000..642f530ffa --- /dev/null +++ b/tests/std/tests/P1135R6_atomic_wait/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_latest_matrix.lst diff --git a/tests/std/tests/P1135R6_atomic_wait/test.cpp b/tests/std/tests/P1135R6_atomic_wait/test.cpp new file mode 100644 index 0000000000..7b7e0ea1fd --- /dev/null +++ b/tests/std/tests/P1135R6_atomic_wait/test.cpp @@ -0,0 +1,10 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "test_atomic_wait.hpp" + +int main() { + assert(__std_atomic_set_api_level(__std_atomic_api_level::__has_wait_on_address) + == __std_atomic_api_level::__has_wait_on_address); + test_atomic_wait(); +} diff --git a/tests/std/tests/P1135R6_atomic_wait_vista/env.lst b/tests/std/tests/P1135R6_atomic_wait_vista/env.lst new file mode 100644 index 0000000000..642f530ffa --- /dev/null +++ b/tests/std/tests/P1135R6_atomic_wait_vista/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_latest_matrix.lst diff --git a/tests/std/tests/P1135R6_atomic_wait_vista/test.cpp b/tests/std/tests/P1135R6_atomic_wait_vista/test.cpp new file mode 100644 index 0000000000..bc42736559 --- /dev/null +++ b/tests/std/tests/P1135R6_atomic_wait_vista/test.cpp @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "test_atomic_wait.hpp" + +int main() { + assert(__std_atomic_set_api_level(__std_atomic_api_level::__has_srwlock) == __std_atomic_api_level::__has_srwlock); + test_atomic_wait(); +} diff --git a/tests/std/tests/VSO_0157762_feature_test_macros/test.cpp b/tests/std/tests/VSO_0157762_feature_test_macros/test.cpp index 880ccf1135..8f3dc6c3d3 100644 --- a/tests/std/tests/VSO_0157762_feature_test_macros/test.cpp +++ b/tests/std/tests/VSO_0157762_feature_test_macros/test.cpp @@ -161,6 +161,20 @@ STATIC_ASSERT(__cpp_lib_atomic_shared_ptr == 201711L); STATIC_ASSERT(__cpp_lib_atomic_value_initialization == 201911L); #endif +#if _HAS_CXX20 +#ifndef __cpp_lib_atomic_wait +#error __cpp_lib_atomic_wait is not defined +#elif __cpp_lib_atomic_wait != 201907L +#error __cpp_lib_atomic_wait is not 201907L +#else +STATIC_ASSERT(__cpp_lib_atomic_wait == 201907L); +#endif +#else +#ifdef __cpp_lib_atomic_wait +#error __cpp_lib_atomic_wait is defined +#endif +#endif + #if _HAS_CXX20 #ifndef __cpp_lib_bind_front #error __cpp_lib_bind_front is not defined