Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sycl/include/CL/sycl/handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2688,7 +2688,8 @@ class __SYCL_EXPORT handler {
class Algorithm>
friend class ext::oneapi::detail::reduction_impl_algo;

// This method needs to call the method finalize().
// This method needs to call the method finalize() and also access to private
// ctor/dtor.
template <typename Reduction, typename... RestT>
std::enable_if_t<!Reduction::is_usm> friend ext::oneapi::detail::
reduSaveFinalResultToUserMemHelper(
Expand Down
97 changes: 39 additions & 58 deletions sycl/include/sycl/ext/oneapi/reduction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,57 +214,56 @@ template <class Reducer> class combiner {
: memory_scope::device;
}

template <access::address_space Space, class T, class AtomicFunctor>
void atomic_combine_impl(T *ReduVarPtr, AtomicFunctor Functor) const {
auto reducer = static_cast<const Reducer *>(this);
for (size_t E = 0; E < Extent; ++E) {
auto AtomicRef =
atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(), Space>(
multi_ptr<T, Space>(ReduVarPtr)[E]);
Functor(AtomicRef, reducer->getElement(E));
}
}

template <class _T, access::address_space Space, class BinaryOperation>
static inline constexpr bool BasicCheck =
std::is_same<typename remove_AS<_T>::type, T>::value &&
(Space == access::address_space::global_space ||
Space == access::address_space::local_space);

public:
/// Atomic ADD operation: *ReduVarPtr += MValue;
template <access::address_space Space = access::address_space::global_space,
typename _T = T, class _BinaryOperation = BinaryOperation>
enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
enable_if_t<BasicCheck<_T, Space, _BinaryOperation> &&
(IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value ||
IsReduOptForAtomic64Add<T, _BinaryOperation>::value) &&
sycl::detail::IsPlus<T, _BinaryOperation>::value &&
(Space == access::address_space::global_space ||
Space == access::address_space::local_space)>
sycl::detail::IsPlus<T, _BinaryOperation>::value>
atomic_combine(_T *ReduVarPtr) const {
auto reducer = static_cast<const Reducer *>(this);
for (size_t E = 0; E < Extent; ++E) {
atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(), Space>(
multi_ptr<T, Space>(ReduVarPtr)[E])
.fetch_add(reducer->getElement(E));
}
atomic_combine_impl<Space>(
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_add(Val); });
}

/// Atomic BITWISE OR operation: *ReduVarPtr |= MValue;
template <access::address_space Space = access::address_space::global_space,
typename _T = T, class _BinaryOperation = BinaryOperation>
enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
enable_if_t<BasicCheck<_T, Space, _BinaryOperation> &&
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
sycl::detail::IsBitOR<T, _BinaryOperation>::value &&
(Space == access::address_space::global_space ||
Space == access::address_space::local_space)>
sycl::detail::IsBitOR<T, _BinaryOperation>::value>
atomic_combine(_T *ReduVarPtr) const {
auto reducer = static_cast<const Reducer *>(this);
for (size_t E = 0; E < Extent; ++E) {
atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(), Space>(
multi_ptr<T, Space>(ReduVarPtr)[E])
.fetch_or(reducer->getElement(E));
}
atomic_combine_impl<Space>(
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_or(Val); });
}

/// Atomic BITWISE XOR operation: *ReduVarPtr ^= MValue;
template <access::address_space Space = access::address_space::global_space,
typename _T = T, class _BinaryOperation = BinaryOperation>
enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
enable_if_t<BasicCheck<_T, Space, _BinaryOperation> &&
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
sycl::detail::IsBitXOR<T, _BinaryOperation>::value &&
(Space == access::address_space::global_space ||
Space == access::address_space::local_space)>
sycl::detail::IsBitXOR<T, _BinaryOperation>::value>
atomic_combine(_T *ReduVarPtr) const {
auto reducer = static_cast<const Reducer *>(this);
for (size_t E = 0; E < Extent; ++E) {
atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(), Space>(
multi_ptr<T, Space>(ReduVarPtr)[E])
.fetch_xor(reducer->getElement(E));
}
atomic_combine_impl<Space>(
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_xor(Val); });
}

/// Atomic BITWISE AND operation: *ReduVarPtr &= MValue;
Expand All @@ -276,46 +275,30 @@ template <class Reducer> class combiner {
(Space == access::address_space::global_space ||
Space == access::address_space::local_space)>
atomic_combine(_T *ReduVarPtr) const {
auto reducer = static_cast<const Reducer *>(this);
for (size_t E = 0; E < Extent; ++E) {
atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(), Space>(
multi_ptr<T, Space>(ReduVarPtr)[E])
.fetch_and(reducer->getElement(E));
}
atomic_combine_impl<Space>(
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_and(Val); });
}

/// Atomic MIN operation: *ReduVarPtr = sycl::minimum(*ReduVarPtr, MValue);
template <access::address_space Space = access::address_space::global_space,
typename _T = T, class _BinaryOperation = BinaryOperation>
enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
enable_if_t<BasicCheck<_T, Space, _BinaryOperation> &&
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
sycl::detail::IsMinimum<T, _BinaryOperation>::value &&
(Space == access::address_space::global_space ||
Space == access::address_space::local_space)>
sycl::detail::IsMinimum<T, _BinaryOperation>::value>
atomic_combine(_T *ReduVarPtr) const {
auto reducer = static_cast<const Reducer *>(this);
for (size_t E = 0; E < Extent; ++E) {
atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(), Space>(
multi_ptr<T, Space>(ReduVarPtr)[E])
.fetch_min(reducer->getElement(E));
}
atomic_combine_impl<Space>(
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_min(Val); });
}

/// Atomic MAX operation: *ReduVarPtr = sycl::maximum(*ReduVarPtr, MValue);
template <access::address_space Space = access::address_space::global_space,
typename _T = T, class _BinaryOperation = BinaryOperation>
enable_if_t<std::is_same<typename remove_AS<_T>::type, T>::value &&
enable_if_t<BasicCheck<_T, Space, _BinaryOperation> &&
IsReduOptForFastAtomicFetch<T, _BinaryOperation>::value &&
sycl::detail::IsMaximum<T, _BinaryOperation>::value &&
(Space == access::address_space::global_space ||
Space == access::address_space::local_space)>
sycl::detail::IsMaximum<T, _BinaryOperation>::value>
atomic_combine(_T *ReduVarPtr) const {
auto reducer = static_cast<const Reducer *>(this);
for (size_t E = 0; E < Extent; ++E) {
atomic_ref<T, memory_order::relaxed, getMemoryScope<Space>(), Space>(
multi_ptr<T, Space>(ReduVarPtr)[E])
.fetch_max(reducer->getElement(E));
}
atomic_combine_impl<Space>(
ReduVarPtr, [](auto Ref, auto Val) { return Ref.fetch_max(Val); });
}
};

Expand Down Expand Up @@ -415,8 +398,6 @@ class reducer<T, BinaryOperation, Dims, Extent, Algorithm, View,
reducer(const T &Identity, BinaryOperation BOp)
: MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {}

// SYCL 2020 revision 4 says this should be const, but this is a bug
// see https://github.com/KhronosGroup/SYCL-Docs/pull/252
reducer<T, BinaryOperation, Dims - 1, Extent, Algorithm, true>
operator[](size_t Index) {
return {MValue[Index], MBinaryOp};
Expand Down