From 0f822ff04cf29f1a0e38684f05fcf5f88f9bd1f0 Mon Sep 17 00:00:00 2001 From: Wesley Maxey Date: Thu, 2 Jun 2022 15:36:02 -0700 Subject: [PATCH] Merge internal changes from CTK 11.7 to libcudacxx. --- include/cuda/std/detail/__config | 1 + .../cuda/std/detail/libcxx/include/semaphore | 4 +-- .../include/support/atomic/atomic_cuda.h | 34 +++++++++---------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/include/cuda/std/detail/__config b/include/cuda/std/detail/__config index 22919a3dc9..c6c233177d 100644 --- a/include/cuda/std/detail/__config +++ b/include/cuda/std/detail/__config @@ -88,6 +88,7 @@ #endif #define _LIBCUDACXX_HAS_EXTERNAL_ATOMIC_IMP #define _LIBCUDACXX_ATOMIC_ALWAYS_LOCK_FREE(size, ptr) (size <= 8) +#define _LIBCUDACXX_HAS_NO_CXX20_CHRONO_LITERALS #define _LIBCUDACXX_SYS_CLOCK_DURATION nanoseconds diff --git a/include/cuda/std/detail/libcxx/include/semaphore b/include/cuda/std/detail/libcxx/include/semaphore index 527eb933ef..bc51f5f4a5 100644 --- a/include/cuda/std/detail/libcxx/include/semaphore +++ b/include/cuda/std/detail/libcxx/include/semaphore @@ -105,7 +105,7 @@ class __atomic_semaphore_base _LIBCUDACXX_INLINE_VISIBILITY bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time) { - return __libcpp_thread_poll_with_backoff([=]() { + return __libcpp_thread_poll_with_backoff([this]() { ptrdiff_t const __old = __count.load(memory_order_acquire); return __old != 0 && __fetch_sub_if_slow(__old); }, __rel_time); @@ -180,7 +180,7 @@ class __atomic_semaphore_base<_Sco, 1> { _LIBCUDACXX_INLINE_VISIBILITY bool __acquire_slow_timed(chrono::nanoseconds const& __rel_time) { - return __libcpp_thread_poll_with_backoff([=]() { + return __libcpp_thread_poll_with_backoff([this]() { return try_acquire(); }, __rel_time); } diff --git a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h index e61d0b6cb6..35a7cd274b 100644 --- a/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h +++ b/include/cuda/std/detail/libcxx/include/support/atomic/atomic_cuda.h @@ -72,7 +72,7 @@ inline void __cxx_atomic_thread_fence(memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - __atomic_thread_fence_cuda(__order, __thread_scope_system_tag()); + __atomic_thread_fence_cuda(static_cast<__memory_order_underlying_t>(__order), __thread_scope_system_tag()); ), NV_IS_HOST, ( __host::__cxx_atomic_thread_fence(__order); @@ -85,7 +85,7 @@ inline void __cxx_atomic_signal_fence(memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - __atomic_signal_fence_cuda(__order); + __atomic_signal_fence_cuda(static_cast<__memory_order_underlying_t>(__order)); ), NV_IS_HOST, ( __host::__cxx_atomic_signal_fence(__order); @@ -181,7 +181,7 @@ __host__ __device__ alignas(_Tp) auto __tmp = __val; NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - __atomic_store_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, __order, __scope_tag<_Sco>()); + __atomic_store_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( __host::__cxx_atomic_store(&__a->__a_value, __tmp, __order); @@ -194,7 +194,7 @@ __host__ __device__ _Tp __cxx_atomic_load(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> const volatile* __a, memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_load_n_cuda(__cxx_get_underlying_device_atomic(__a), __order, __scope_tag<_Sco>()); + return __atomic_load_n_cuda(__cxx_get_underlying_device_atomic(__a), static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_load(&__a->__a_value, __order); @@ -208,7 +208,7 @@ __host__ __device__ alignas(_Tp) auto __tmp = __val; NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_exchange_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, __order, __scope_tag<_Sco>()); + return __atomic_exchange_n_cuda(__cxx_get_underlying_device_atomic(__a), __tmp, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_exchange(&__a->__a_value, __tmp, __order); @@ -224,7 +224,7 @@ __host__ __device__ NV_DISPATCH_TARGET( NV_IS_DEVICE, ( alignas(_Tp) auto __tmp_v = __val; - __result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, false, __success, __failure, __scope_tag<_Sco>()); + __result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, false, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__failure), __scope_tag<_Sco>()); ), NV_IS_HOST, ( __result = __host::__cxx_atomic_compare_exchange_strong(&__a->__a_value, &__tmp, __val, __success, __failure); @@ -242,7 +242,7 @@ __host__ __device__ NV_DISPATCH_TARGET( NV_IS_DEVICE, ( alignas(_Tp) auto __tmp_v = __val; - __result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, true, __success, __failure, __scope_tag<_Sco>()); + __result = __atomic_compare_exchange_cuda(__cxx_get_underlying_device_atomic(__a), &__tmp, &__tmp_v, true, static_cast<__memory_order_underlying_t>(__success), static_cast<__memory_order_underlying_t>(__failure), __scope_tag<_Sco>()); ), NV_IS_HOST, ( __result = __host::__cxx_atomic_compare_exchange_weak(&__a->__a_value, &__tmp, __val, __success, __failure); @@ -257,7 +257,7 @@ __host__ __device__ _Tp __cxx_atomic_fetch_add(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __delta, memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>()); + return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order); @@ -270,7 +270,7 @@ __host__ __device__ _Tp* __cxx_atomic_fetch_add(__cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref> volatile* __a, ptrdiff_t __delta, memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>()); + return __atomic_fetch_add_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_fetch_add(&__a->__a_value, __delta, __order); @@ -283,7 +283,7 @@ __host__ __device__ _Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __delta, memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>()); + return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order); @@ -296,7 +296,7 @@ __host__ __device__ _Tp* __cxx_atomic_fetch_sub(__cxx_atomic_base_heterogeneous_impl<_Tp*, _Sco, _Ref> volatile* __a, ptrdiff_t __delta, memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, __order, __scope_tag<_Sco>()); + return __atomic_fetch_sub_cuda(__cxx_get_underlying_device_atomic(__a), __delta, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_fetch_sub(&__a->__a_value, __delta, __order); @@ -309,7 +309,7 @@ __host__ __device__ _Tp __cxx_atomic_fetch_and(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_and_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>()); + return __atomic_fetch_and_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_fetch_and(&__a->__a_value, __pattern, __order); @@ -322,7 +322,7 @@ __host__ __device__ _Tp __cxx_atomic_fetch_or(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_or_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>()); + return __atomic_fetch_or_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_fetch_or(&__a->__a_value, __pattern, __order); @@ -335,7 +335,7 @@ __host__ __device__ _Tp __cxx_atomic_fetch_xor(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Tp __pattern, memory_order __order) { NV_DISPATCH_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_xor_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, __order, __scope_tag<_Sco>()); + return __atomic_fetch_xor_cuda(__cxx_get_underlying_device_atomic(__a), __pattern, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), NV_IS_HOST, ( return __host::__cxx_atomic_fetch_xor(&__a->__a_value, __pattern, __order); @@ -348,7 +348,7 @@ __host__ __device__ _Tp __cxx_atomic_fetch_max(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Delta __val, memory_order __order) { NV_IF_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>()); + return __atomic_fetch_max_cuda(__cxx_get_underlying_device_atomic(__a), __val, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), ( return __host::__cxx_atomic_fetch_max(&__a->__a_value, __val, __order); ) @@ -360,7 +360,7 @@ __host__ __device__ _Tp __cxx_atomic_fetch_min(__cxx_atomic_base_heterogeneous_impl<_Tp, _Sco, _Ref> volatile* __a, _Delta __val, memory_order __order) { NV_IF_TARGET( NV_IS_DEVICE, ( - return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), __val, __order, __scope_tag<_Sco>()); + return __atomic_fetch_min_cuda(__cxx_get_underlying_device_atomic(__a), __val, static_cast<__memory_order_underlying_t>(__order), __scope_tag<_Sco>()); ), ( return __host::__cxx_atomic_fetch_min(&__a->__a_value, __val, __order); ) @@ -428,7 +428,7 @@ __host__ __device__ inline bool __cxx_atomic_compare_exchange_weak(__cxx_atomic_ auto const __actual = __cxx_small_from_32<_Tp>(__temp); if(!__ret) { if(0 == __cuda_memcmp(&__actual, __expected, sizeof(_Tp))) - __cxx_atomic_fetch_and(&__a->__a_value, (1u << (8*sizeof(_Tp))) - 1, memory_order::memory_order_relaxed); + __cxx_atomic_fetch_and(&__a->__a_value, (1u << (8*sizeof(_Tp))) - 1, memory_order_relaxed); else *__expected = __actual; }