Wrap/Unwrap store, exchange, and load to make sure the 'written to' p…

…arts of atomics are aligned
NVIDIA · Jul 29, 2021 · efa59b9 · efa59b9
1 parent eb17621
commit efa59b9
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 10 deletions.
diff --git a/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp b/.upstream-tests/test/cuda/bad_atomic_alignment.pass.cpp
@@ -33,6 +33,8 @@ int main(int argc, char ** argv)
     static_assert(alignof(key) == 4, "");
     cuda::atomic<key> k;
     auto r = k.load();
+    k.store(r);
+    (void)k.exchange(r);
     unused(r);
   }
   // Test forcibly aligned user type
@@ -44,6 +46,8 @@ int main(int argc, char ** argv)
     static_assert(alignof(key) == 8, "");
     cuda::atomic<key> k;
     auto r = k.load();
+    k.store(r);
+    (void)k.exchange(r);
     unused(r);
   }
   return 0;

diff --git a/libcxx/include/support/atomic/atomic_base.h b/libcxx/include/support/atomic/atomic_base.h
@@ -11,9 +11,12 @@
 #ifndef _LIBCUDACXX_ATOMIC_BASE_H
 #define _LIBCUDACXX_ATOMIC_BASE_H
 
+#include <type_traits>
+
 template <typename _Tp, int _Sco>
 struct __cxx_atomic_base_impl {
-  using __cxx_underlying_type = _Tp;
+  using __underlying_t = _Tp;
+  static constexpr int __sco = _Sco;
 
   _LIBCUDACXX_CONSTEXPR
   __cxx_atomic_base_impl() _NOEXCEPT = default;
@@ -50,7 +53,8 @@ const volatile _Tp* __cxx_get_underlying_atomic(__cxx_atomic_base_impl<_Tp, _Sco
 
 template <typename _Tp, int _Sco>
 struct __cxx_atomic_ref_base_impl {
-  using __cxx_underlying_type = _Tp;
+  using __underlying_t = _Tp;
+  static constexpr int __sco = _Sco;
 
   _LIBCUDACXX_CONSTEXPR
   __cxx_atomic_ref_base_impl() _NOEXCEPT = default;
@@ -91,7 +95,7 @@ _LIBCUDACXX_INLINE_VISIBILITY auto __cxx_atomic_base_unwrap(_Tp* __a) _NOEXCEPT
 }
 
 template <typename _Tp>
-using __cxx_atomic_underlying_t = typename _Tp::__cxx_underlying_type;
+using __cxx_atomic_underlying_t = typename _Tp::__underlying_t;
 
 _LIBCUDACXX_INLINE_VISIBILITY inline _LIBCUDACXX_CONSTEXPR int __to_gcc_order(memory_order __order) {
   // Avoid switch statement to make this a constexpr.
@@ -140,27 +144,29 @@ inline void __cxx_atomic_store(_Tp* __a,  _Up __val,
                         memory_order __order) {
   auto __a_tmp = __cxx_atomic_base_unwrap(__a);
   (void)__a_tmp;
-  __atomic_store(__a_tmp, &__val, __to_gcc_order(__order));
+  __cxx_atomic_base_impl<__cxx_atomic_underlying_t<_Tp>, _Tp::__sco> __v_temp(__val);
+  __atomic_store(__a, &__v_temp, __to_gcc_order(__order));
 }
 
 template <typename _Tp>
 inline auto __cxx_atomic_load(const _Tp* __a,
                        memory_order __order) -> __cxx_atomic_underlying_t<_Tp> {
   auto __a_tmp = __cxx_atomic_base_unwrap(__a);
   (void)__a_tmp;
-  __cxx_atomic_underlying_t<_Tp> __ret;
-  __atomic_load(__a_tmp, &__ret, __to_gcc_order(__order));
-  return __ret;
+  __cxx_atomic_base_impl<__cxx_atomic_underlying_t<_Tp>, _Tp::__sco> __ret;
+  __atomic_load(__a, &__ret, __to_gcc_order(__order));
+  return __ret.__a_value;
 }
 
 template <typename _Tp, typename _Up>
 inline auto __cxx_atomic_exchange(_Tp* __a, _Up __value,
                           memory_order __order) -> __cxx_atomic_underlying_t<_Tp> {
   auto __a_tmp = __cxx_atomic_base_unwrap(__a);
   (void)__a_tmp;
-  __cxx_atomic_underlying_t<_Tp> __ret;
-  __atomic_exchange(__a_tmp, &__value, &__ret, __to_gcc_order(__order));
-  return __ret;
+  __cxx_atomic_base_impl<__cxx_atomic_underlying_t<_Tp>, _Tp::__sco> __v_temp(__value);
+  __cxx_atomic_base_impl<__cxx_atomic_underlying_t<_Tp>, _Tp::__sco> __ret;
+  __atomic_exchange(__a, &__v_temp, &__ret, __to_gcc_order(__order));
+  return __ret.__a_value;
 }
 
 template <typename _Tp, typename _Up>