From 8c68366b30d5fea5d775353b5f8a14b22a5c6fe0 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Wed, 13 Jul 2022 16:49:17 -0700 Subject: [PATCH 01/24] [NFC][SYCL] Remove Algorithm from reducer's template params It isn't used for anything and the reducer is really lower level entity than the reduction algorithm. --- sycl/include/sycl/ext/oneapi/reduction.hpp | 70 ++++++++++------------ 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 3005dc3417944..a30db6672ae61 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -124,17 +124,17 @@ __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize, /// functions and representing users' reduction variable. /// The generic version of the class represents those reductions of those /// types and operations for which the identity value is not known. -/// The Algorithm template can be used to specialize reducers for different -/// reduction algorithms. The View template describes whether the reducer -/// owns its data or not: if View is 'true', then the reducer does not own -/// its data and instead provides a view of data allocated elsewhere (i.e. -/// via a reference or pointer member); if View is 'false', then the reducer -/// owns its data. With the current default reduction algorithm, the top-level -/// reducers that are passed to the user's lambda contain a private copy of -/// the reduction variable, whereas any reducer created by a subscript operator -/// contains a reference to a reduction variable allocated elsewhere. +/// The View template describes whether the reducer owns its data or not: if +/// View is 'true', then the reducer does not own its data and instead provides +/// a view of data allocated elsewhere (i.e. via a reference or pointer member); +/// if View is 'false', then the reducer owns its data. With the current default +/// reduction algorithm, the top-level reducers that are passed to the user's +/// lambda contain a private copy of the reduction variable, whereas any reducer +/// created by a subscript operator contains a reference to a reduction variable +/// allocated elsewhere. The Subst parameter is an implementation detail and is +/// used to spell out restrictions using 'enable_if'. template + bool View = false, typename Subst = void> class reducer; /// Helper class for accessing reducer-defined types in CRTP @@ -142,9 +142,8 @@ class reducer; template struct ReducerTraits; template -struct ReducerTraits< - reducer> { + bool View, typename Subst> +struct ReducerTraits> { using type = T; using op = BinaryOperation; static constexpr int dims = Dims; @@ -319,14 +318,13 @@ template class combiner { /// /// It stores a copy of the identity and binary operation associated with the /// reduction. -template +template class reducer< - T, BinaryOperation, Dims, Extent, Algorithm, View, + T, BinaryOperation, Dims, Extent, View, enable_if_t::value>> : public combiner< - reducer::value>>> { @@ -352,14 +350,13 @@ class reducer< /// /// It allows to reduce the size of the 'reducer' object by not holding /// the identity field inside it and allows to add a default constructor. -template +template class reducer< - T, BinaryOperation, Dims, Extent, Algorithm, View, + T, BinaryOperation, Dims, Extent, View, enable_if_t::value>> : public combiner< - reducer::value>>> { @@ -383,11 +380,10 @@ class reducer< /// Component of 'reducer' class for array reductions, representing a single /// element of the span (as returned by the subscript operator). -template -class reducer +class reducer> - : public combiner>> { public: reducer(T &Ref, BinaryOperation BOp) : MElement(Ref), MBinaryOp(BOp) {} @@ -401,13 +397,12 @@ class reducer +template class reducer< - T, BinaryOperation, Dims, Extent, Algorithm, View, + T, BinaryOperation, Dims, Extent, View, enable_if_t::value>> - : public combiner::value>>> { @@ -415,8 +410,7 @@ class reducer< reducer(const T &Identity, BinaryOperation BOp) : MValue(Identity), MIdentity(Identity), MBinaryOp(BOp) {} - reducer - operator[](size_t Index) { + reducer operator[](size_t Index) { return {MValue[Index], MBinaryOp}; } @@ -432,13 +426,12 @@ class reducer< /// Specialization of 'reducer' class for array reductions accepting a span /// in cases where the identity value is known. -template +template class reducer< - T, BinaryOperation, Dims, Extent, Algorithm, View, + T, BinaryOperation, Dims, Extent, View, enable_if_t::value>> - : public combiner::value>>> { @@ -448,8 +441,7 @@ class reducer< // SYCL 2020 revision 4 says this should be const, but this is a bug // see https://github.com/KhronosGroup/SYCL-Docs/pull/252 - reducer - operator[](size_t Index) { + reducer operator[](size_t Index) { return {MValue[Index], BinaryOperation()}; } @@ -525,9 +517,7 @@ class reduction_impl_algo< using base = reduction_impl_common; public: - using reducer_type = - reducer>; + using reducer_type = reducer; using result_type = T; using binary_operation = BinaryOperation; From 417df4112391f9cd7eadb7f6220df53bd9a07c38 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 12:51:37 -0700 Subject: [PATCH 02/24] [WIP] SFINAE-out reduction_impl ctors --- sycl/include/sycl/ext/oneapi/reduction.hpp | 114 +++++++++++++++------ sycl/include/sycl/handler.hpp | 4 +- sycl/include/sycl/reduction.hpp | 43 +++++--- 3 files changed, 109 insertions(+), 52 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index a30db6672ae61..74c62f8219f62 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -503,15 +503,16 @@ class default_reduction_algorithm {}; /// Templated class for implementations of specific reduction algorithms template + typename RedOutVar, class Algorithm> class reduction_impl_algo; /// Original reduction algorithm is the default. It supports both USM and /// accessors via a single class template + typename RedOutVar, bool IsUSM, access::placeholder IsPlaceholder, + int AccessorDims> class reduction_impl_algo< - T, BinaryOperation, Dims, Extent, + T, BinaryOperation, Dims, Extent, RedOutVar, default_reduction_algorithm> : public reduction_impl_common { using base = reduction_impl_common; @@ -534,6 +535,7 @@ class reduction_impl_algo< access::target::device, IsPlaceholder, ext::oneapi::accessor_property_list<>>; + static constexpr bool has_atomic_add_float64 = IsReduOptForAtomic64Add::value; static constexpr bool has_fast_atomics = @@ -705,6 +707,7 @@ class reduction_impl_algo< return Acc; } + RedOutVar *MRedOut; /// User's accessor to where the reduction must be written. std::shared_ptr MRWAcc; std::shared_ptr MDWAcc; @@ -731,16 +734,38 @@ template struct AreAllButLastReductions { static constexpr bool value = !std::is_base_of>::value; }; +template +struct is_rw_acc_t : public std::false_type {}; + +template +struct is_rw_acc_t< + accessor>> + : public std::true_type {}; + +template +struct is_dw_acc_t : public std::false_type {}; + +template +struct is_dw_acc_t>> + : public std::true_type {}; /// This class encapsulates the reduction variable/accessor, /// the reduction operator and an optional operator identity. template + typename RedOutVar, class Algorithm> class reduction_impl : private reduction_impl_base, - public reduction_impl_algo { + public reduction_impl_algo { private: - using algo = reduction_impl_algo; + using algo = reduction_impl_algo; + static constexpr bool my_is_usm = std::is_same_v; + static constexpr bool my_is_rw_acc = is_rw_acc_t::value; + static constexpr bool my_is_dw_acc = is_dw_acc_t::value; public: using reducer_type = typename algo::reducer_type; @@ -753,8 +778,9 @@ class reduction_impl /// SYCL-2020. /// Constructs reduction_impl when the identity value is statically known. template ::value> * = nullptr> + std::enable_if_t< + sycl::detail::IsKnownIdentityOp<_T, BinaryOperation>::value && + my_is_rw_acc> * = nullptr> reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, bool InitializeToIdentity) : algo(reducer_type::getIdentity(), BinaryOperation(), @@ -767,8 +793,10 @@ class reduction_impl } /// Constructs reduction_impl when the identity value is statically known. - template ::value> * = nullptr> + template < + typename _T = T, + enable_if_t::value && + my_is_rw_acc> * = nullptr> reduction_impl(rw_accessor_type &Acc) : algo(reducer_type::getIdentity(), BinaryOperation(), false, std::make_shared(Acc)) { @@ -779,8 +807,10 @@ class reduction_impl } /// Constructs reduction_impl when the identity value is statically known. - template ::value> * = nullptr> + template < + typename _T = T, + enable_if_t::value && + my_is_dw_acc> * = nullptr> reduction_impl(dw_accessor_type &Acc) : algo(reducer_type::getIdentity(), BinaryOperation(), true, std::make_shared(Acc)) { @@ -795,8 +825,8 @@ class reduction_impl /// and user still passed the identity value. template < typename _T, typename AllocatorT, - enable_if_t::value> - * = nullptr> + enable_if_t::value && + my_is_rw_acc> * = nullptr> reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, const T & /*Identity*/, BinaryOperation, bool InitializeToIdentity) @@ -822,8 +852,10 @@ class reduction_impl /// Constructs reduction_impl when the identity value is statically known, /// and user still passed the identity value. - template ::value> * = nullptr> + template < + typename _T = T, + enable_if_t::value && + my_is_rw_acc> * = nullptr> reduction_impl(rw_accessor_type &Acc, const T & /*Identity*/, BinaryOperation) : algo(reducer_type::getIdentity(), BinaryOperation(), false, std::make_shared(Acc)) { @@ -846,8 +878,10 @@ class reduction_impl /// Constructs reduction_impl when the identity value is statically known, /// and user still passed the identity value. - template ::value> * = nullptr> + template < + typename _T = T, + enable_if_t::value && + my_is_dw_acc> * = nullptr> reduction_impl(dw_accessor_type &Acc, const T & /*Identity*/, BinaryOperation) : algo(reducer_type::getIdentity(), BinaryOperation(), true, std::make_shared(Acc)) { @@ -870,10 +904,10 @@ class reduction_impl /// SYCL-2020. /// Constructs reduction_impl when the identity value is NOT known statically. - template < - typename _T, typename AllocatorT, - enable_if_t::value> - * = nullptr> + template ::value && + my_is_rw_acc> * = nullptr> reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity) @@ -888,7 +922,8 @@ class reduction_impl /// Constructs reduction_impl when the identity value is unknown. template ::value> * = nullptr> + _T, BinaryOperation>::value && + my_is_rw_acc> * = nullptr> reduction_impl(rw_accessor_type &Acc, const T &Identity, BinaryOperation BOp) : algo(Identity, BOp, false, std::make_shared(Acc)) { if (Acc.size() != 1) @@ -899,7 +934,8 @@ class reduction_impl /// Constructs reduction_impl when the identity value is unknown. template ::value> * = nullptr> + _T, BinaryOperation>::value && + my_is_dw_acc> * = nullptr> reduction_impl(dw_accessor_type &Acc, const T &Identity, BinaryOperation BOp) : algo(Identity, BOp, true, std::make_shared(Acc)) { if (Acc.size() != 1) @@ -912,8 +948,10 @@ class reduction_impl /// The \param VarPtr is a USM pointer to memory, to where the computed /// reduction value is added using BinaryOperation, i.e. it is expected that /// the memory is pre-initialized with some meaningful value. - template ::value> * = nullptr> + template < + typename _T = T, + enable_if_t::value && + my_is_usm> * = nullptr> reduction_impl(T *VarPtr, bool InitializeToIdentity = false) : algo(reducer_type::getIdentity(), BinaryOperation(), InitializeToIdentity, VarPtr) {} @@ -923,8 +961,10 @@ class reduction_impl /// The \param VarPtr is a USM pointer to memory, to where the computed /// reduction value is added using BinaryOperation, i.e. it is expected that /// the memory is pre-initialized with some meaningful value. - template ::value> * = nullptr> + template < + typename _T = T, + enable_if_t::value && + my_is_usm> * = nullptr> reduction_impl(T *VarPtr, const T &Identity, BinaryOperation, bool InitializeToIdentity = false) : algo(Identity, BinaryOperation(), InitializeToIdentity, VarPtr) { @@ -946,7 +986,8 @@ class reduction_impl /// reduction value is added using BinaryOperation, i.e. it is expected that /// the memory is pre-initialized with some meaningful value. template ::value> * = nullptr> + _T, BinaryOperation>::value && + my_is_usm> * = nullptr> reduction_impl(T *VarPtr, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity = false) : algo(Identity, BOp, InitializeToIdentity, VarPtr) {} @@ -960,8 +1001,10 @@ class reduction_impl /// Constructs reduction_impl when the identity value is statically known /// and user passed an identity value anyway - template ::value> * = nullptr> + template < + typename _T = T, + enable_if_t::value && + my_is_usm> * = nullptr> reduction_impl(span<_T, Extent> Span, const T & /* Identity */, BinaryOperation BOp, bool InitializeToIdentity = false) : algo(reducer_type::getIdentity(), BOp, InitializeToIdentity, @@ -969,7 +1012,8 @@ class reduction_impl /// Constructs reduction_impl when the identity value is not statically known template ::value> * = nullptr> + _T, BinaryOperation>::value && + my_is_usm> * = nullptr> reduction_impl(span Span, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity = false) : algo(Identity, BOp, InitializeToIdentity, Span.data()) {} @@ -2566,6 +2610,7 @@ tuple_select_elements(TupleT Tuple, std::index_sequence) { template detail::reduction_impl, detail::default_reduction_algorithm> reduction(accessor &Acc, const T &Identity, BinaryOperation BOp) { @@ -2581,6 +2626,7 @@ template ::value, detail::reduction_impl< T, BinaryOperation, 0, 1, + accessor, detail::default_reduction_algorithm>> reduction(accessor &Acc, BinaryOperation) { @@ -2593,7 +2639,7 @@ reduction(accessor &Acc, /// \param Identity, and the binary operation used in the reduction. template detail::reduction_impl< - T, BinaryOperation, 0, 1, + T, BinaryOperation, 0, 1, T *, detail::default_reduction_algorithm> reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { return {VarPtr, Identity, BOp}; @@ -2607,7 +2653,7 @@ reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { template std::enable_if_t< sycl::detail::IsKnownIdentityOp::value, - detail::reduction_impl>> reduction(T *VarPtr, BinaryOperation) { diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index c6ee4bc17c675..20cf4b1f8c683 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -242,7 +242,7 @@ namespace ext { namespace oneapi { namespace detail { template + typename RedOutVar, class Algorithm> class reduction_impl_algo; using cl::sycl::detail::enable_if_t; @@ -2667,7 +2667,7 @@ class __SYCL_EXPORT handler { // Make reduction friends to store buffers and arrays created for it // in handler from reduction methods. template + typename RedOutVar, class Algorithm> friend class ext::oneapi::detail::reduction_impl_algo; #ifndef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 7416f38f31c6a..5e293ff298770 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -21,11 +21,15 @@ namespace sycl { /// Constructs a reduction object using the given buffer \p Var, handler \p CGH, /// reduction operation \p Combiner, and optional reduction properties. template -std::enable_if_t::value, - ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm< - false, access::placeholder::true_t, 1>>> +std::enable_if_t< + has_known_identity::value, + ext::oneapi::detail::reduction_impl< + T, BinaryOperation, 0, 1, + accessor>, + ext::oneapi::detail::default_reduction_algorithm< + false, access::placeholder::true_t, 1>>> reduction(buffer Var, handler &CGH, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -38,11 +42,15 @@ reduction(buffer Var, handler &CGH, BinaryOperation, /// The reduction algorithm may be less efficient for this variant as the /// reduction identity is not known statically and it is not provided by user. template -std::enable_if_t::value, - ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm< - false, access::placeholder::true_t, 1>>> +std::enable_if_t< + !has_known_identity::value, + ext::oneapi::detail::reduction_impl< + T, BinaryOperation, 0, 1, + accessor>, + ext::oneapi::detail::default_reduction_algorithm< + false, access::placeholder::true_t, 1>>> reduction(buffer, handler &, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. @@ -58,7 +66,7 @@ reduction(buffer, handler &, BinaryOperation, template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, + T, BinaryOperation, 0, 1, T *, ext::oneapi::detail::default_reduction_algorithm< true, access::placeholder::false_t, 1>>> reduction(T *Var, BinaryOperation, const property_list &PropList = {}) { @@ -75,7 +83,7 @@ reduction(T *Var, BinaryOperation, const property_list &PropList = {}) { template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, + T, BinaryOperation, 0, 1, T *, ext::oneapi::detail::default_reduction_algorithm< true, access::placeholder::false_t, 1>>> reduction(T *, BinaryOperation, const property_list &PropList = {}) { @@ -92,6 +100,9 @@ reduction(T *, BinaryOperation, const property_list &PropList = {}) { template ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, + accessor>, ext::oneapi::detail::default_reduction_algorithm< false, access::placeholder::true_t, 1>> reduction(buffer Var, handler &CGH, const T &Identity, @@ -106,7 +117,7 @@ reduction(buffer Var, handler &CGH, const T &Identity, /// binary operation \p Combiner, and optional reduction properties. template ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, + T, BinaryOperation, 0, 1, T *, ext::oneapi::detail::default_reduction_algorithm< true, access::placeholder::false_t, 1>> reduction(T *Var, const T &Identity, BinaryOperation Combiner, @@ -124,7 +135,7 @@ template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 1, Extent, + T, BinaryOperation, 1, Extent, T *, ext::oneapi::detail::default_reduction_algorithm< true, access::placeholder::false_t, 1>>> reduction(span Span, BinaryOperation, @@ -143,7 +154,7 @@ template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 1, Extent, + T, BinaryOperation, 1, Extent, T *, ext::oneapi::detail::default_reduction_algorithm< true, access::placeholder::false_t, 1>>> reduction(span, BinaryOperation, @@ -161,7 +172,7 @@ reduction(span, BinaryOperation, template std::enable_if_t>> reduction(span Span, const T &Identity, BinaryOperation Combiner, From a2718ef562d1c9e2895e1b6ce3038fc6319530fd Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 13:06:45 -0700 Subject: [PATCH 03/24] Move my_is_* into base class --- sycl/include/sycl/ext/oneapi/reduction.hpp | 48 ++++++++++++---------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 74c62f8219f62..5f6563136d649 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -496,6 +496,24 @@ template class reduction_impl_common { bool InitializeToIdentity; }; +template +struct is_rw_acc_t : public std::false_type {}; + +template +struct is_rw_acc_t< + accessor>> + : public std::true_type {}; + +template +struct is_dw_acc_t : public std::false_type {}; + +template +struct is_dw_acc_t>> + : public std::true_type {}; + /// Types representing specific reduction algorithms /// Enables reduction_impl_algo to take additional algorithm-specific templates template @@ -517,6 +535,11 @@ class reduction_impl_algo< : public reduction_impl_common { using base = reduction_impl_common; +protected: + static constexpr bool my_is_usm = std::is_same_v; + static constexpr bool my_is_rw_acc = is_rw_acc_t::value; + static constexpr bool my_is_dw_acc = is_dw_acc_t::value; + public: using reducer_type = reducer; using result_type = T; @@ -734,24 +757,6 @@ template struct AreAllButLastReductions { static constexpr bool value = !std::is_base_of>::value; }; -template -struct is_rw_acc_t : public std::false_type {}; - -template -struct is_rw_acc_t< - accessor>> - : public std::true_type {}; - -template -struct is_dw_acc_t : public std::false_type {}; - -template -struct is_dw_acc_t>> - : public std::true_type {}; - /// This class encapsulates the reduction variable/accessor, /// the reduction operator and an optional operator identity. template ; - static constexpr bool my_is_usm = std::is_same_v; - static constexpr bool my_is_rw_acc = is_rw_acc_t::value; - static constexpr bool my_is_dw_acc = is_dw_acc_t::value; + + using algo::my_is_usm; + using algo::my_is_rw_acc; + using algo::my_is_dw_acc; public: using reducer_type = typename algo::reducer_type; From 7c686fced54a34211e3d1bb209563e8d07c948f8 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 13:21:39 -0700 Subject: [PATCH 04/24] SFINAE-out base class ctors as well --- sycl/include/sycl/ext/oneapi/reduction.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 5f6563136d649..da8e661b96486 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -534,6 +534,9 @@ class reduction_impl_algo< default_reduction_algorithm> : public reduction_impl_common { using base = reduction_impl_common; + using self = reduction_impl_algo< + T, BinaryOperation, Dims, Extent, RedOutVar, + default_reduction_algorithm>; protected: static constexpr bool my_is_usm = std::is_same_v; @@ -572,12 +575,17 @@ class reduction_impl_algo< static constexpr size_t dims = Dims; static constexpr size_t num_elements = Extent; + template * = nullptr> reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, std::shared_ptr AccPointer) : base(Identity, BinaryOp, Init), MRWAcc(AccPointer){}; + template * = nullptr> reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, std::shared_ptr AccPointer) : base(Identity, BinaryOp, Init), MDWAcc(AccPointer){}; + template * = nullptr> reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, T *USMPointer) : base(Identity, BinaryOp, Init), MUSMPointer(USMPointer){}; From 3b1fd02a94f1073b753148149ae128260fedb0cc Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 13:49:02 -0700 Subject: [PATCH 05/24] Init single reference to user reduction var --- sycl/include/sycl/ext/oneapi/reduction.hpp | 12 ++++++++---- sycl/include/sycl/reduction.hpp | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index da8e661b96486..0cd8333d6ccdd 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -579,16 +579,19 @@ class reduction_impl_algo< std::enable_if_t<_self::my_is_rw_acc> * = nullptr> reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, std::shared_ptr AccPointer) - : base(Identity, BinaryOp, Init), MRWAcc(AccPointer){}; + : base(Identity, BinaryOp, Init), MRWAcc(AccPointer), + MRedOut(*AccPointer){}; template * = nullptr> reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, std::shared_ptr AccPointer) - : base(Identity, BinaryOp, Init), MDWAcc(AccPointer){}; + : base(Identity, BinaryOp, Init), MDWAcc(AccPointer), + MRedOut(*AccPointer){}; template * = nullptr> reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, T *USMPointer) - : base(Identity, BinaryOp, Init), MUSMPointer(USMPointer){}; + : base(Identity, BinaryOp, Init), MUSMPointer(USMPointer), + MRedOut(MUSMPointer){}; /// Associates the reduction accessor to user's memory with \p CGH handler /// to keep the accessor alive until the command group finishes the work. @@ -738,7 +741,6 @@ class reduction_impl_algo< return Acc; } - RedOutVar *MRedOut; /// User's accessor to where the reduction must be written. std::shared_ptr MRWAcc; std::shared_ptr MDWAcc; @@ -748,6 +750,8 @@ class reduction_impl_algo< /// USM pointer referencing the memory to where the result of the reduction /// must be written. Applicable/used only for USM reductions. T *MUSMPointer = nullptr; + + RedOutVar &MRedOut; }; /// Predicate returning true if all template type parameters except the last one diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 5e293ff298770..bb8cb269980c6 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -25,7 +25,7 @@ std::enable_if_t< has_known_identity::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - accessor>, ext::oneapi::detail::default_reduction_algorithm< @@ -46,7 +46,7 @@ std::enable_if_t< !has_known_identity::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - accessor>, ext::oneapi::detail::default_reduction_algorithm< @@ -100,7 +100,7 @@ reduction(T *, BinaryOperation, const property_list &PropList = {}) { template ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - accessor>, ext::oneapi::detail::default_reduction_algorithm< From edc843a784d9c97a73c15828e31e07786d77e98c Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 14:14:33 -0700 Subject: [PATCH 06/24] Switch uses to generic/template MRedOut --- sycl/include/sycl/ext/oneapi/reduction.hpp | 93 +++++++++------------- sycl/include/sycl/handler.hpp | 4 +- 2 files changed, 41 insertions(+), 56 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 0cd8333d6ccdd..a8a8bd2dd493f 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -538,11 +538,6 @@ class reduction_impl_algo< T, BinaryOperation, Dims, Extent, RedOutVar, default_reduction_algorithm>; -protected: - static constexpr bool my_is_usm = std::is_same_v; - static constexpr bool my_is_rw_acc = is_rw_acc_t::value; - static constexpr bool my_is_dw_acc = is_dw_acc_t::value; - public: using reducer_type = reducer; using result_type = T; @@ -568,7 +563,14 @@ class reduction_impl_algo< IsReduOptForFastAtomicFetch::value; static constexpr bool has_fast_reduce = IsReduOptForFastReduce::value; + static constexpr bool is_usm = IsUSM; + static constexpr bool my_is_usm = std::is_same_v; + static_assert(is_usm == my_is_usm); + + static constexpr bool my_is_rw_acc = is_rw_acc_t::value; + static constexpr bool my_is_dw_acc = is_dw_acc_t::value; + static constexpr bool is_placeholder = (IsPlaceholder == access::placeholder::true_t); @@ -597,10 +599,10 @@ class reduction_impl_algo< /// to keep the accessor alive until the command group finishes the work. /// This function does not do anything for USM reductions. void associateWithHandler(handler &CGH) { - if (MRWAcc) - CGH.associateWithHandler(MRWAcc.get(), access::target::device); - else if (MDWAcc) - CGH.associateWithHandler(MDWAcc.get(), access::target::device); + if constexpr (!my_is_usm) { + static_assert(my_is_rw_acc || my_is_dw_acc); + CGH.associateWithHandler(&MRedOut, access::target::device); + } } /// Creates and returns a local accessor with the \p Size elements. @@ -624,7 +626,7 @@ class reduction_impl_algo< template std::enable_if_t getWriteMemForPartialReds(size_t, handler &) { - return getUSMPointer(); + return getUserRedVar(); } /// Returns user's accessor passed to reduction for editing if that is @@ -633,8 +635,8 @@ class reduction_impl_algo< template std::enable_if_t getWriteMemForPartialReds(size_t, handler &CGH) { - if (MRWAcc) - return *MRWAcc; + if constexpr (my_is_rw_acc) + return MRedOut; return getWriteMemForPartialReds(1, CGH); } @@ -655,9 +657,11 @@ class reduction_impl_algo< /// Otherwise, a new buffer is created and accessor to that buffer is /// returned. rw_accessor_type getWriteAccForPartialReds(size_t Size, handler &CGH) { - if (Size == 1 && MRWAcc != nullptr) { - associateWithHandler(CGH); - return *MRWAcc; + if constexpr (my_is_rw_acc) { + if (Size == 1) { + associateWithHandler(CGH); + return MRedOut; + } } // Create a new output buffer and return an accessor to it. @@ -673,8 +677,12 @@ class reduction_impl_algo< template std::enable_if_t getReadWriteAccessorToInitializedMem(handler &CGH) { - if (!is_usm && !base::initializeToIdentity()) - return *MRWAcc; + if constexpr (my_is_rw_acc) { + if (!base::initializeToIdentity()) + return MRedOut; + } + assert(!(my_is_dw_acc && !base::initializeToIdentity()) && + "Unexpected condition!"); // TODO: Move to T[] in C++20 to simplify handling here // auto RWReduVal = std::make_shared(); @@ -701,22 +709,7 @@ class reduction_impl_algo< return {*CounterBuf, CGH}; } - bool hasUserDiscardWriteAccessor() { return MDWAcc != nullptr; } - - template - std::enable_if_t getUserReadWriteAccessor() { - return *MRWAcc; - } - - template - std::enable_if_t getUserDiscardWriteAccessor() { - return *MDWAcc; - } - - result_type *getUSMPointer() { - assert(is_usm && "Unexpected call of getUSMPointer()."); - return MUSMPointer; - } + RedOutVar &getUserRedVar() { return MRedOut; } static inline result_type *getOutPointer(const rw_accessor_type &OutAcc) { return OutAcc.get_pointer().get(); @@ -781,11 +774,11 @@ class reduction_impl using algo = reduction_impl_algo; +public: using algo::my_is_usm; using algo::my_is_rw_acc; using algo::my_is_dw_acc; -public: using reducer_type = typename algo::reducer_type; using rw_accessor_type = typename algo::rw_accessor_type; using dw_accessor_type = typename algo::dw_accessor_type; @@ -1746,10 +1739,7 @@ std::enable_if_t reduSaveFinalResultToUserMem(handler &CGH, Reduction &Redu) { auto InAcc = Redu.getReadAccToPreviousPartialReds(CGH); Redu.associateWithHandler(CGH); - if (Redu.hasUserDiscardWriteAccessor()) - CGH.copy(InAcc, Redu.getUserDiscardWriteAccessor()); - else - CGH.copy(InAcc, Redu.getUserReadWriteAccessor()); + CGH.copy(InAcc, Redu.getUserRedVar()); } // This method is used for implementation of parallel_for accepting 1 reduction. @@ -1762,7 +1752,7 @@ std::enable_if_t reduSaveFinalResultToUserMem(handler &CGH, Reduction &Redu) { constexpr size_t NElements = Reduction::num_elements; auto InAcc = Redu.getReadAccToPreviousPartialReds(CGH); - auto UserVarPtr = Redu.getUSMPointer(); + auto UserVarPtr = Redu.getUserRedVar(); bool IsUpdateOfUserVar = !Redu.initializeToIdentity(); auto BOp = Redu.getBinaryOperation(); CGH.single_task([=] { @@ -2559,21 +2549,16 @@ template void reduSaveFinalResultToUserMemHelper( std::vector &Events, std::shared_ptr Queue, bool IsHost, Reduction &Redu, RestT... Rest) { - // Reductions initialized with USM pointer currently do not require copying - // because the last kernel writes directly to the USM memory. - if constexpr (!Reduction::is_usm) { - if (Redu.hasUserDiscardWriteAccessor()) { - event CopyEvent = - withAuxHandler(Queue, IsHost, [&](handler &CopyHandler) { - auto InAcc = Redu.getReadAccToPreviousPartialReds(CopyHandler); - auto OutAcc = Redu.getUserDiscardWriteAccessor(); - Redu.associateWithHandler(CopyHandler); - if (!Events.empty()) - CopyHandler.depends_on(Events.back()); - CopyHandler.copy(InAcc, OutAcc); - }); - Events.push_back(CopyEvent); - } + if constexpr (Reduction::my_is_dw_acc) { + event CopyEvent = withAuxHandler(Queue, IsHost, [&](handler &CopyHandler) { + auto InAcc = Redu.getReadAccToPreviousPartialReds(CopyHandler); + auto OutAcc = Redu.getUserRedVar(); + Redu.associateWithHandler(CopyHandler); + if (!Events.empty()) + CopyHandler.depends_on(Events.back()); + CopyHandler.copy(InAcc, OutAcc); + }); + Events.push_back(CopyEvent); } reduSaveFinalResultToUserMemHelper(Events, Queue, IsHost, Rest...); } diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 20cf4b1f8c683..8d99605b699df 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -1655,7 +1655,7 @@ class __SYCL_EXPORT handler { *this, KernelFunc, Range, MaxWGSize, NumConcurrentWorkGroups, Redu); if (Reduction::is_usm || (Reduction::has_fast_atomics && Redu.initializeToIdentity()) || - (!Reduction::has_fast_atomics && Redu.hasUserDiscardWriteAccessor())) { + (!Reduction::has_fast_atomics && Reduction::my_is_dw_acc)) { this->finalize(); MLastEvent = withAuxHandler(QueueCopy, [&](handler &CopyHandler) { ext::oneapi::detail::reduSaveFinalResultToUserMem( @@ -1782,7 +1782,7 @@ class __SYCL_EXPORT handler { }); } // end while (NWorkItems > 1) - if (Reduction::is_usm || Redu.hasUserDiscardWriteAccessor()) { + if (Reduction::is_usm || Reduction::my_is_dw_acc) { MLastEvent = withAuxHandler(QueueCopy, [&](handler &CopyHandler) { ext::oneapi::detail::reduSaveFinalResultToUserMem( CopyHandler, Redu); From 445e8719047b57522b126cb86aef9c086a6000b6 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 14:26:32 -0700 Subject: [PATCH 07/24] Make MRedOut the *only* version of the user's reduction var In other words, remove separate MRWAcc/MDWAcc/MUSMPointer. --- sycl/include/sycl/ext/oneapi/reduction.hpp | 108 ++++----------------- 1 file changed, 17 insertions(+), 91 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index a8a8bd2dd493f..8e07bd5506ead 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -577,23 +577,9 @@ class reduction_impl_algo< static constexpr size_t dims = Dims; static constexpr size_t num_elements = Extent; - template * = nullptr> reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, - std::shared_ptr AccPointer) - : base(Identity, BinaryOp, Init), MRWAcc(AccPointer), - MRedOut(*AccPointer){}; - template * = nullptr> - reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, - std::shared_ptr AccPointer) - : base(Identity, BinaryOp, Init), MDWAcc(AccPointer), - MRedOut(*AccPointer){}; - template * = nullptr> - reduction_impl_algo(const T &Identity, BinaryOperation BinaryOp, bool Init, - T *USMPointer) - : base(Identity, BinaryOp, Init), MUSMPointer(USMPointer), - MRedOut(MUSMPointer){}; + RedOutVar RedOut) + : base(Identity, BinaryOp, Init), MRedOut(std::move(RedOut)){}; /// Associates the reduction accessor to user's memory with \p CGH handler /// to keep the accessor alive until the command group finishes the work. @@ -734,17 +720,10 @@ class reduction_impl_algo< return Acc; } - /// User's accessor to where the reduction must be written. - std::shared_ptr MRWAcc; - std::shared_ptr MDWAcc; - std::shared_ptr> MOutBufPtr; - /// USM pointer referencing the memory to where the result of the reduction - /// must be written. Applicable/used only for USM reductions. - T *MUSMPointer = nullptr; - - RedOutVar &MRedOut; + /// User's accessor/USM pointer to where the reduction must be written. + RedOutVar MRedOut; }; /// Predicate returning true if all template type parameters except the last one @@ -795,7 +774,7 @@ class reduction_impl reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, bool InitializeToIdentity) : algo(reducer_type::getIdentity(), BinaryOperation(), - InitializeToIdentity, std::make_shared(Buffer)) { + InitializeToIdentity, rw_accessor_type{Buffer}) { algo::associateWithHandler(CGH); if (Buffer.size() != 1) throw sycl::runtime_error(errc::invalid, @@ -807,24 +786,9 @@ class reduction_impl template < typename _T = T, enable_if_t::value && - my_is_rw_acc> * = nullptr> - reduction_impl(rw_accessor_type &Acc) - : algo(reducer_type::getIdentity(), BinaryOperation(), false, - std::make_shared(Acc)) { - if (Acc.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - } - - /// Constructs reduction_impl when the identity value is statically known. - template < - typename _T = T, - enable_if_t::value && - my_is_dw_acc> * = nullptr> - reduction_impl(dw_accessor_type &Acc) - : algo(reducer_type::getIdentity(), BinaryOperation(), true, - std::make_shared(Acc)) { + (my_is_rw_acc || my_is_dw_acc)> * = nullptr> + reduction_impl(RedOutVar &Acc) + : algo(reducer_type::getIdentity(), BinaryOperation(), false, Acc) { if (Acc.size() != 1) throw sycl::runtime_error(errc::invalid, "Reduction variable must be a scalar.", @@ -842,7 +806,7 @@ class reduction_impl const T & /*Identity*/, BinaryOperation, bool InitializeToIdentity) : algo(reducer_type::getIdentity(), BinaryOperation(), - InitializeToIdentity, std::make_shared(Buffer)) { + InitializeToIdentity, rw_accessor_type{Buffer}) { algo::associateWithHandler(CGH); if (Buffer.size() != 1) throw sycl::runtime_error(errc::invalid, @@ -866,36 +830,10 @@ class reduction_impl template < typename _T = T, enable_if_t::value && - my_is_rw_acc> * = nullptr> - reduction_impl(rw_accessor_type &Acc, const T & /*Identity*/, BinaryOperation) - : algo(reducer_type::getIdentity(), BinaryOperation(), false, - std::make_shared(Acc)) { - if (Acc.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - // For now the implementation ignores the identity value given by user - // when the implementation knows the identity. - // The SPEC could prohibit passing identity parameter to operations with - // known identity, but that could have some bad consequences too. - // For example, at some moment the implementation may NOT know the identity - // for COMPLEX-PLUS reduction. User may create a program that would pass - // COMPLEX value (0,0) as identity for PLUS reduction. At some later moment - // when the implementation starts handling COMPLEX-PLUS as known operation - // the existing user's program remains compilable and working correctly. - // I.e. with this constructor here, adding more reduction operations to the - // list of known operations does not break the existing programs. - } - - /// Constructs reduction_impl when the identity value is statically known, - /// and user still passed the identity value. - template < - typename _T = T, - enable_if_t::value && - my_is_dw_acc> * = nullptr> - reduction_impl(dw_accessor_type &Acc, const T & /*Identity*/, BinaryOperation) - : algo(reducer_type::getIdentity(), BinaryOperation(), true, - std::make_shared(Acc)) { + (my_is_rw_acc || my_is_dw_acc)> * = nullptr> + reduction_impl(RedOutVar &Acc, const T & /*Identity*/, BinaryOperation) + : algo(reducer_type::getIdentity(), BinaryOperation(), my_is_dw_acc, + Acc) { if (Acc.size() != 1) throw sycl::runtime_error(errc::invalid, "Reduction variable must be a scalar.", @@ -923,7 +861,7 @@ class reduction_impl const T &Identity, BinaryOperation BOp, bool InitializeToIdentity) : algo(Identity, BOp, InitializeToIdentity, - std::make_shared(Buffer)) { + rw_accessor_type{Buffer}) { algo::associateWithHandler(CGH); if (Buffer.size() != 1) throw sycl::runtime_error(errc::invalid, @@ -934,21 +872,9 @@ class reduction_impl /// Constructs reduction_impl when the identity value is unknown. template ::value && - my_is_rw_acc> * = nullptr> - reduction_impl(rw_accessor_type &Acc, const T &Identity, BinaryOperation BOp) - : algo(Identity, BOp, false, std::make_shared(Acc)) { - if (Acc.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - } - - /// Constructs reduction_impl when the identity value is unknown. - template ::value && - my_is_dw_acc> * = nullptr> - reduction_impl(dw_accessor_type &Acc, const T &Identity, BinaryOperation BOp) - : algo(Identity, BOp, true, std::make_shared(Acc)) { + (my_is_rw_acc || my_is_dw_acc)> * = nullptr> + reduction_impl(RedOutVar &Acc, const T &Identity, BinaryOperation BOp) + : algo(Identity, BOp, my_is_dw_acc, Acc) { if (Acc.size() != 1) throw sycl::runtime_error(errc::invalid, "Reduction variable must be a scalar.", From 3a4d91d62cef0a4230b2d1932b39238f65ed54a9 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 15:34:55 -0700 Subject: [PATCH 08/24] Simplification of overloads with/without known identity --- sycl/include/sycl/ext/oneapi/reduction.hpp | 175 ++++++--------------- 1 file changed, 50 insertions(+), 125 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 8e07bd5506ead..8c99aefc2bc16 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -752,6 +752,32 @@ class reduction_impl private: using algo = reduction_impl_algo; + using self = reduction_impl; + + static constexpr bool is_known_identity = + sycl::detail::IsKnownIdentityOp::value; + + // TODO: Do we also need chooseBinOp? + static constexpr T chooseIdentity(const T &Identity) { + // For now the implementation ignores the identity value given by user + // when the implementation knows the identity. + // The SPEC could prohibit passing identity parameter to operations with + // known identity, but that could have some bad consequences too. + // For example, at some moment the implementation may NOT know the identity + // for COMPLEX-PLUS reduction. User may create a program that would pass + // COMPLEX value (0,0) as identity for PLUS reduction. At some later moment + // when the implementation starts handling COMPLEX-PLUS as known operation + // the existing user's program remains compilable and working correctly. + // I.e. with this constructor here, adding more reduction operations to the + // list of known operations does not break the existing programs. + if constexpr (is_known_identity) { + (void)Identity; + return reducer_type::getIdentity(); + + } else { + return Identity; + } + } public: using algo::my_is_usm; @@ -767,11 +793,10 @@ class reduction_impl /// SYCL-2020. /// Constructs reduction_impl when the identity value is statically known. - template ::value && - my_is_rw_acc> * = nullptr> - reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, + template + * = nullptr> + reduction_impl(buffer Buffer, handler &CGH, bool InitializeToIdentity) : algo(reducer_type::getIdentity(), BinaryOperation(), InitializeToIdentity, rw_accessor_type{Buffer}) { @@ -783,10 +808,9 @@ class reduction_impl } /// Constructs reduction_impl when the identity value is statically known. - template < - typename _T = T, - enable_if_t::value && - (my_is_rw_acc || my_is_dw_acc)> * = nullptr> + template * = nullptr> reduction_impl(RedOutVar &Acc) : algo(reducer_type::getIdentity(), BinaryOperation(), false, Acc) { if (Acc.size() != 1) @@ -798,69 +822,14 @@ class reduction_impl /// SYCL-2020. /// Constructs reduction_impl when the identity value is statically known, /// and user still passed the identity value. - template < - typename _T, typename AllocatorT, - enable_if_t::value && - my_is_rw_acc> * = nullptr> - reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, - const T & /*Identity*/, BinaryOperation, - bool InitializeToIdentity) - : algo(reducer_type::getIdentity(), BinaryOperation(), - InitializeToIdentity, rw_accessor_type{Buffer}) { - algo::associateWithHandler(CGH); - if (Buffer.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - // For now the implementation ignores the identity value given by user - // when the implementation knows the identity. - // The SPEC could prohibit passing identity parameter to operations with - // known identity, but that could have some bad consequences too. - // For example, at some moment the implementation may NOT know the identity - // for COMPLEX-PLUS reduction. User may create a program that would pass - // COMPLEX value (0,0) as identity for PLUS reduction. At some later moment - // when the implementation starts handling COMPLEX-PLUS as known operation - // the existing user's program remains compilable and working correctly. - // I.e. with this constructor here, adding more reduction operations to the - // list of known operations does not break the existing programs. - } - - /// Constructs reduction_impl when the identity value is statically known, - /// and user still passed the identity value. - template < - typename _T = T, - enable_if_t::value && - (my_is_rw_acc || my_is_dw_acc)> * = nullptr> - reduction_impl(RedOutVar &Acc, const T & /*Identity*/, BinaryOperation) - : algo(reducer_type::getIdentity(), BinaryOperation(), my_is_dw_acc, - Acc) { - if (Acc.size() != 1) - throw sycl::runtime_error(errc::invalid, - "Reduction variable must be a scalar.", - PI_ERROR_INVALID_VALUE); - // For now the implementation ignores the identity value given by user - // when the implementation knows the identity. - // The SPEC could prohibit passing identity parameter to operations with - // known identity, but that could have some bad consequences too. - // For example, at some moment the implementation may NOT know the identity - // for COMPLEX-PLUS reduction. User may create a program that would pass - // COMPLEX value (0,0) as identity for PLUS reduction. At some later moment - // when the implementation starts handling COMPLEX-PLUS as known operation - // the existing user's program remains compilable and working correctly. - // I.e. with this constructor here, adding more reduction operations to the - // list of known operations does not break the existing programs. - } - /// SYCL-2020. /// Constructs reduction_impl when the identity value is NOT known statically. - template ::value && - my_is_rw_acc> * = nullptr> - reduction_impl(buffer<_T, 1, AllocatorT> Buffer, handler &CGH, + template * = nullptr> + reduction_impl(buffer Buffer, handler &CGH, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity) - : algo(Identity, BOp, InitializeToIdentity, + : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, rw_accessor_type{Buffer}) { algo::associateWithHandler(CGH); if (Buffer.size() != 1) @@ -870,11 +839,10 @@ class reduction_impl } /// Constructs reduction_impl when the identity value is unknown. - template ::value && - (my_is_rw_acc || my_is_dw_acc)> * = nullptr> + template * = nullptr> reduction_impl(RedOutVar &Acc, const T &Identity, BinaryOperation BOp) - : algo(Identity, BOp, my_is_dw_acc, Acc) { + : algo(chooseIdentity(Identity), BOp, my_is_dw_acc, Acc) { if (Acc.size() != 1) throw sycl::runtime_error(errc::invalid, "Reduction variable must be a scalar.", @@ -885,75 +853,32 @@ class reduction_impl /// The \param VarPtr is a USM pointer to memory, to where the computed /// reduction value is added using BinaryOperation, i.e. it is expected that /// the memory is pre-initialized with some meaningful value. - template < - typename _T = T, - enable_if_t::value && - my_is_usm> * = nullptr> + template * = nullptr> reduction_impl(T *VarPtr, bool InitializeToIdentity = false) : algo(reducer_type::getIdentity(), BinaryOperation(), InitializeToIdentity, VarPtr) {} - /// Constructs reduction_impl when the identity value is statically known, - /// and user still passed the identity value. /// The \param VarPtr is a USM pointer to memory, to where the computed /// reduction value is added using BinaryOperation, i.e. it is expected that /// the memory is pre-initialized with some meaningful value. - template < - typename _T = T, - enable_if_t::value && - my_is_usm> * = nullptr> - reduction_impl(T *VarPtr, const T &Identity, BinaryOperation, - bool InitializeToIdentity = false) - : algo(Identity, BinaryOperation(), InitializeToIdentity, VarPtr) { - // For now the implementation ignores the identity value given by user - // when the implementation knows the identity. - // The SPEC could prohibit passing identity parameter to operations with - // known identity, but that could have some bad consequences too. - // For example, at some moment the implementation may NOT know the identity - // for COMPLEX-PLUS reduction. User may create a program that would pass - // COMPLEX value (0,0) as identity for PLUS reduction. At some later moment - // when the implementation starts handling COMPLEX-PLUS as known operation - // the existing user's program remains compilable and working correctly. - // I.e. with this constructor here, adding more reduction operations to the - // list of known operations does not break the existing programs. - } - - /// Constructs reduction_impl when the identity value is unknown. - /// The \param VarPtr is a USM pointer to memory, to where the computed - /// reduction value is added using BinaryOperation, i.e. it is expected that - /// the memory is pre-initialized with some meaningful value. - template ::value && - my_is_usm> * = nullptr> + template * = nullptr> reduction_impl(T *VarPtr, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity = false) - : algo(Identity, BOp, InitializeToIdentity, VarPtr) {} + : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, VarPtr) {} /// Constructs reduction_impl when the identity value is statically known - template ::value> * = nullptr> - reduction_impl(span<_T, Extent> Span, bool InitializeToIdentity = false) + template * = nullptr> + reduction_impl(span Span, bool InitializeToIdentity = false) : algo(reducer_type::getIdentity(), BinaryOperation(), InitializeToIdentity, Span.data()) {} - /// Constructs reduction_impl when the identity value is statically known - /// and user passed an identity value anyway - template < - typename _T = T, - enable_if_t::value && - my_is_usm> * = nullptr> - reduction_impl(span<_T, Extent> Span, const T & /* Identity */, - BinaryOperation BOp, bool InitializeToIdentity = false) - : algo(reducer_type::getIdentity(), BOp, InitializeToIdentity, - Span.data()) {} - - /// Constructs reduction_impl when the identity value is not statically known - template ::value && - my_is_usm> * = nullptr> + template * = nullptr> reduction_impl(span Span, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity = false) - : algo(Identity, BOp, InitializeToIdentity, Span.data()) {} + : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, Span.data()) { + } }; /// A helper to pass undefined (sycl::detail::auto_name) names unmodified. We From 862a4249bae6d3f3071cf900ed81d03f626ccc7d Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 16:07:31 -0700 Subject: [PATCH 09/24] Move RedOutVar to the end of the template params list I hoped that I'll be able to deduce it from arguments, but partial CTAD doesn't exist, unfortunately. --- sycl/include/sycl/ext/oneapi/reduction.hpp | 46 +++++++++++---------- sycl/include/sycl/handler.hpp | 4 +- sycl/include/sycl/reduction.hpp | 47 ++++++++++++---------- 3 files changed, 53 insertions(+), 44 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 8c99aefc2bc16..61a7dbf78906c 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -521,22 +521,23 @@ class default_reduction_algorithm {}; /// Templated class for implementations of specific reduction algorithms template + class Algorithm, typename RedOutVar> class reduction_impl_algo; /// Original reduction algorithm is the default. It supports both USM and /// accessors via a single class template + bool IsUSM, access::placeholder IsPlaceholder, int AccessorDims, + typename RedOutVar> class reduction_impl_algo< - T, BinaryOperation, Dims, Extent, RedOutVar, - default_reduction_algorithm> + T, BinaryOperation, Dims, Extent, + default_reduction_algorithm, RedOutVar> : public reduction_impl_common { using base = reduction_impl_common; using self = reduction_impl_algo< - T, BinaryOperation, Dims, Extent, RedOutVar, - default_reduction_algorithm>; + T, BinaryOperation, Dims, Extent, + default_reduction_algorithm, + RedOutVar>; public: using reducer_type = reducer; @@ -744,15 +745,16 @@ template struct AreAllButLastReductions { /// This class encapsulates the reduction variable/accessor, /// the reduction operator and an optional operator identity. template + class Algorithm, typename RedOutVar> class reduction_impl : private reduction_impl_base, - public reduction_impl_algo { + public reduction_impl_algo { private: - using algo = reduction_impl_algo; - using self = reduction_impl; + using algo = reduction_impl_algo; + using self = + reduction_impl; static constexpr bool is_known_identity = sycl::detail::IsKnownIdentityOp::value; @@ -2464,8 +2466,8 @@ tuple_select_elements(TupleT Tuple, std::index_sequence) { template detail::reduction_impl, - detail::default_reduction_algorithm> + detail::default_reduction_algorithm, + accessor> reduction(accessor &Acc, const T &Identity, BinaryOperation BOp) { return {Acc, Identity, BOp}; @@ -2480,8 +2482,8 @@ template ::value, detail::reduction_impl< T, BinaryOperation, 0, 1, - accessor, - detail::default_reduction_algorithm>> + detail::default_reduction_algorithm, + accessor>> reduction(accessor &Acc, BinaryOperation) { return {Acc}; @@ -2493,8 +2495,9 @@ reduction(accessor &Acc, /// \param Identity, and the binary operation used in the reduction. template detail::reduction_impl< - T, BinaryOperation, 0, 1, T *, - detail::default_reduction_algorithm> + T, BinaryOperation, 0, 1, + detail::default_reduction_algorithm, + T *> reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { return {VarPtr, Identity, BOp}; } @@ -2507,9 +2510,10 @@ reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { template std::enable_if_t< sycl::detail::IsKnownIdentityOp::value, - detail::reduction_impl>> + true, access::placeholder::false_t, 1>, + T *>> reduction(T *VarPtr, BinaryOperation) { return {VarPtr}; } diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 8d99605b699df..220c2ea1122f3 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -242,7 +242,7 @@ namespace ext { namespace oneapi { namespace detail { template + class Algorithm, typename RedOutVar> class reduction_impl_algo; using cl::sycl::detail::enable_if_t; @@ -2667,7 +2667,7 @@ class __SYCL_EXPORT handler { // Make reduction friends to store buffers and arrays created for it // in handler from reduction methods. template + class Algorithm, typename RedOutVar> friend class ext::oneapi::detail::reduction_impl_algo; #ifndef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index bb8cb269980c6..de9635e164356 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -25,11 +25,11 @@ std::enable_if_t< has_known_identity::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, + ext::oneapi::detail::default_reduction_algorithm< + false, access::placeholder::true_t, 1>, accessor>, - ext::oneapi::detail::default_reduction_algorithm< - false, access::placeholder::true_t, 1>>> + ext::oneapi::accessor_property_list<>>>> reduction(buffer Var, handler &CGH, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -46,11 +46,11 @@ std::enable_if_t< !has_known_identity::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, + ext::oneapi::detail::default_reduction_algorithm< + false, access::placeholder::true_t, 1>, accessor>, - ext::oneapi::detail::default_reduction_algorithm< - false, access::placeholder::true_t, 1>>> + ext::oneapi::accessor_property_list<>>>> reduction(buffer, handler &, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. @@ -66,9 +66,10 @@ reduction(buffer, handler &, BinaryOperation, template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, T *, + T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - true, access::placeholder::false_t, 1>>> + true, access::placeholder::false_t, 1>, + T *>> reduction(T *Var, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = PropList.has_property(); @@ -83,9 +84,10 @@ reduction(T *Var, BinaryOperation, const property_list &PropList = {}) { template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, T *, + T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - true, access::placeholder::false_t, 1>>> + true, access::placeholder::false_t, 1>, + T *>> reduction(T *, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. (void)PropList; @@ -100,11 +102,11 @@ reduction(T *, BinaryOperation, const property_list &PropList = {}) { template ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, + ext::oneapi::detail::default_reduction_algorithm< + false, access::placeholder::true_t, 1>, accessor>, - ext::oneapi::detail::default_reduction_algorithm< - false, access::placeholder::true_t, 1>> + ext::oneapi::accessor_property_list<>>> reduction(buffer Var, handler &CGH, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -117,9 +119,10 @@ reduction(buffer Var, handler &CGH, const T &Identity, /// binary operation \p Combiner, and optional reduction properties. template ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, T *, + T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - true, access::placeholder::false_t, 1>> + true, access::placeholder::false_t, 1>, + T *> reduction(T *Var, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -135,9 +138,9 @@ template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 1, Extent, T *, + T, BinaryOperation, 1, Extent, ext::oneapi::detail::default_reduction_algorithm< - true, access::placeholder::false_t, 1>>> + true, access::placeholder::false_t, 1>, T *>> reduction(span Span, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -154,9 +157,10 @@ template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 1, Extent, T *, + T, BinaryOperation, 1, Extent, ext::oneapi::detail::default_reduction_algorithm< - true, access::placeholder::false_t, 1>>> + true, access::placeholder::false_t, 1>, + T *>> reduction(span, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. @@ -172,9 +176,10 @@ reduction(span, BinaryOperation, template std::enable_if_t>> + true, access::placeholder::false_t, 1>, + T *>> reduction(span Span, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { bool InitializeToIdentity = From 96502df04f29467ab890f21a7bf50c718261fa44 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 16:43:16 -0700 Subject: [PATCH 10/24] Remove IsUSM from default_reduction_algorithm --- sycl/include/sycl/ext/oneapi/reduction.hpp | 30 ++++++++++------------ sycl/include/sycl/reduction.hpp | 19 +++++++------- 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 61a7dbf78906c..f7e8f598d8476 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -516,7 +516,7 @@ struct is_dw_acc_t +template class default_reduction_algorithm {}; /// Templated class for implementations of specific reduction algorithms @@ -527,17 +527,16 @@ class reduction_impl_algo; /// Original reduction algorithm is the default. It supports both USM and /// accessors via a single class template class reduction_impl_algo< T, BinaryOperation, Dims, Extent, - default_reduction_algorithm, RedOutVar> + default_reduction_algorithm, RedOutVar> : public reduction_impl_common { using base = reduction_impl_common; using self = reduction_impl_algo< T, BinaryOperation, Dims, Extent, - default_reduction_algorithm, - RedOutVar>; + default_reduction_algorithm, RedOutVar>; public: using reducer_type = reducer; @@ -565,9 +564,8 @@ class reduction_impl_algo< static constexpr bool has_fast_reduce = IsReduOptForFastReduce::value; - static constexpr bool is_usm = IsUSM; static constexpr bool my_is_usm = std::is_same_v; - static_assert(is_usm == my_is_usm); + static constexpr bool is_usm = my_is_usm; static constexpr bool my_is_rw_acc = is_rw_acc_t::value; static constexpr bool my_is_dw_acc = is_dw_acc_t::value; @@ -2466,7 +2464,7 @@ tuple_select_elements(TupleT Tuple, std::index_sequence) { template detail::reduction_impl, + detail::default_reduction_algorithm, accessor> reduction(accessor &Acc, const T &Identity, BinaryOperation BOp) { @@ -2482,7 +2480,7 @@ template ::value, detail::reduction_impl< T, BinaryOperation, 0, 1, - detail::default_reduction_algorithm, + detail::default_reduction_algorithm, accessor>> reduction(accessor &Acc, BinaryOperation) { @@ -2496,8 +2494,7 @@ reduction(accessor &Acc, template detail::reduction_impl< T, BinaryOperation, 0, 1, - detail::default_reduction_algorithm, - T *> + detail::default_reduction_algorithm, T *> reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { return {VarPtr, Identity, BOp}; } @@ -2508,12 +2505,11 @@ reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { /// operation used in the reduction. /// The identity value is not passed to this version as it is statically known. template -std::enable_if_t< - sycl::detail::IsKnownIdentityOp::value, - detail::reduction_impl, - T *>> +std::enable_if_t::value, + detail::reduction_impl, + T *>> reduction(T *VarPtr, BinaryOperation) { return {VarPtr}; } diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index de9635e164356..340947e049e36 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -26,7 +26,7 @@ std::enable_if_t< ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - false, access::placeholder::true_t, 1>, + access::placeholder::true_t, 1>, accessor>>> @@ -47,7 +47,7 @@ std::enable_if_t< ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - false, access::placeholder::true_t, 1>, + access::placeholder::true_t, 1>, accessor>>> @@ -68,7 +68,7 @@ std::enable_if_t::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - true, access::placeholder::false_t, 1>, + access::placeholder::false_t, 1>, T *>> reduction(T *Var, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -86,7 +86,7 @@ std::enable_if_t::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - true, access::placeholder::false_t, 1>, + access::placeholder::false_t, 1>, T *>> reduction(T *, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. @@ -103,7 +103,7 @@ template ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - false, access::placeholder::true_t, 1>, + access::placeholder::true_t, 1>, accessor>> @@ -121,7 +121,7 @@ template ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, ext::oneapi::detail::default_reduction_algorithm< - true, access::placeholder::false_t, 1>, + access::placeholder::false_t, 1>, T *> reduction(T *Var, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { @@ -140,7 +140,8 @@ std::enable_if_t, T *>> + access::placeholder::false_t, 1>, + T *>> reduction(span Span, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -159,7 +160,7 @@ std::enable_if_t, + access::placeholder::false_t, 1>, T *>> reduction(span, BinaryOperation, const property_list &PropList = {}) { @@ -178,7 +179,7 @@ std::enable_if_t, + access::placeholder::false_t, 1>, T *>> reduction(span Span, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { From 252a71c262a08b529e37517cfe25077354dd5238 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 17:21:11 -0700 Subject: [PATCH 11/24] Remove IsPlaceholder from default_reduction_algorithm --- sycl/include/sycl/ext/oneapi/reduction.hpp | 78 +++++++++++----------- sycl/include/sycl/reduction.hpp | 33 +++------ 2 files changed, 48 insertions(+), 63 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index f7e8f598d8476..15cc1be9684ad 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -499,24 +499,32 @@ template class reduction_impl_common { template struct is_rw_acc_t : public std::false_type {}; -template -struct is_rw_acc_t< - accessor>> +template +struct is_rw_acc_t> : public std::true_type {}; template struct is_dw_acc_t : public std::false_type {}; -template +template struct is_dw_acc_t>> + access::target::device, IsPlaceholder, PropList>> + : public std::true_type {}; + +template +struct is_placeholder_t : public std::false_type {}; + +template +struct is_placeholder_t> : public std::true_type {}; /// Types representing specific reduction algorithms /// Enables reduction_impl_algo to take additional algorithm-specific templates -template +template class default_reduction_algorithm {}; /// Templated class for implementations of specific reduction algorithms @@ -527,16 +535,15 @@ class reduction_impl_algo; /// Original reduction algorithm is the default. It supports both USM and /// accessors via a single class template + int AccessorDims, typename RedOutVar> class reduction_impl_algo< T, BinaryOperation, Dims, Extent, - default_reduction_algorithm, RedOutVar> + default_reduction_algorithm, RedOutVar> : public reduction_impl_common { using base = reduction_impl_common; - using self = reduction_impl_algo< - T, BinaryOperation, Dims, Extent, - default_reduction_algorithm, RedOutVar>; + using self = + reduction_impl_algo, RedOutVar>; public: using reducer_type = reducer; @@ -548,15 +555,12 @@ class reduction_impl_algo< // AccessorDims also determines the dimensionality of some temp storage static constexpr int accessor_dim = AccessorDims; static constexpr int buffer_dim = (AccessorDims == 0) ? 1 : AccessorDims; + static constexpr access::placeholder is_placeholder = + is_placeholder_t::value ? access::placeholder::true_t + : access::placeholder::false_t; using rw_accessor_type = accessor>; - using dw_accessor_type = - accessor>; - - static constexpr bool has_atomic_add_float64 = IsReduOptForAtomic64Add::value; static constexpr bool has_fast_atomics = @@ -570,9 +574,6 @@ class reduction_impl_algo< static constexpr bool my_is_rw_acc = is_rw_acc_t::value; static constexpr bool my_is_dw_acc = is_dw_acc_t::value; - static constexpr bool is_placeholder = - (IsPlaceholder == access::placeholder::true_t); - static constexpr size_t dims = Dims; static constexpr size_t num_elements = Extent; @@ -705,14 +706,16 @@ class reduction_impl_algo< } private: - template - std::enable_if_t + template + std::enable_if_t<_self::is_placeholder == access::placeholder::false_t, + rw_accessor_type> createHandlerWiredReadWriteAccessor(handler &CGH, BufferT Buffer) { return {Buffer, CGH}; } - template - std::enable_if_t + template + std::enable_if_t<_self::is_placeholder == access::placeholder::true_t, + rw_accessor_type> createHandlerWiredReadWriteAccessor(handler &CGH, BufferT Buffer) { rw_accessor_type Acc(Buffer); CGH.require(Acc); @@ -786,7 +789,6 @@ class reduction_impl using reducer_type = typename algo::reducer_type; using rw_accessor_type = typename algo::rw_accessor_type; - using dw_accessor_type = typename algo::dw_accessor_type; // Only scalar and 1D array reductions are supported by SYCL 2020. static_assert(Dims <= 1, "Multi-dimensional reductions are not supported."); @@ -2464,7 +2466,7 @@ tuple_select_elements(TupleT Tuple, std::index_sequence) { template detail::reduction_impl, + detail::default_reduction_algorithm, accessor> reduction(accessor &Acc, const T &Identity, BinaryOperation BOp) { @@ -2480,7 +2482,7 @@ template ::value, detail::reduction_impl< T, BinaryOperation, 0, 1, - detail::default_reduction_algorithm, + detail::default_reduction_algorithm, accessor>> reduction(accessor &Acc, BinaryOperation) { @@ -2492,9 +2494,8 @@ reduction(accessor &Acc, /// the computed reduction must be stored \param VarPtr, identity value /// \param Identity, and the binary operation used in the reduction. template -detail::reduction_impl< - T, BinaryOperation, 0, 1, - detail::default_reduction_algorithm, T *> +detail::reduction_impl, T *> reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { return {VarPtr, Identity, BOp}; } @@ -2505,11 +2506,10 @@ reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { /// operation used in the reduction. /// The identity value is not passed to this version as it is statically known. template -std::enable_if_t::value, - detail::reduction_impl, - T *>> +std::enable_if_t< + sycl::detail::IsKnownIdentityOp::value, + detail::reduction_impl, T *>> reduction(T *VarPtr, BinaryOperation) { return {VarPtr}; } diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 340947e049e36..0057c12cd31e9 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -25,8 +25,7 @@ std::enable_if_t< has_known_identity::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm< - access::placeholder::true_t, 1>, + ext::oneapi::detail::default_reduction_algorithm<1>, accessor>>> @@ -46,8 +45,7 @@ std::enable_if_t< !has_known_identity::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm< - access::placeholder::true_t, 1>, + ext::oneapi::detail::default_reduction_algorithm<1>, accessor>>> @@ -67,9 +65,7 @@ template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm< - access::placeholder::false_t, 1>, - T *>> + ext::oneapi::detail::default_reduction_algorithm<1>, T *>> reduction(T *Var, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = PropList.has_property(); @@ -85,9 +81,7 @@ template std::enable_if_t::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm< - access::placeholder::false_t, 1>, - T *>> + ext::oneapi::detail::default_reduction_algorithm<1>, T *>> reduction(T *, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. (void)PropList; @@ -102,8 +96,7 @@ reduction(T *, BinaryOperation, const property_list &PropList = {}) { template ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm< - access::placeholder::true_t, 1>, + ext::oneapi::detail::default_reduction_algorithm<1>, accessor>> @@ -120,9 +113,7 @@ reduction(buffer Var, handler &CGH, const T &Identity, template ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm< - access::placeholder::false_t, 1>, - T *> + ext::oneapi::detail::default_reduction_algorithm<1>, T *> reduction(T *Var, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -139,9 +130,7 @@ std::enable_if_t::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 1, Extent, - ext::oneapi::detail::default_reduction_algorithm< - access::placeholder::false_t, 1>, - T *>> + ext::oneapi::detail::default_reduction_algorithm<1>, T *>> reduction(span Span, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -159,9 +148,7 @@ std::enable_if_t::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 1, Extent, - ext::oneapi::detail::default_reduction_algorithm< - access::placeholder::false_t, 1>, - T *>> + ext::oneapi::detail::default_reduction_algorithm<1>, T *>> reduction(span, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. @@ -178,9 +165,7 @@ template std::enable_if_t, - T *>> + ext::oneapi::detail::default_reduction_algorithm<1>, T *>> reduction(span Span, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { bool InitializeToIdentity = From c15825bb900cd4aa154b77f9975cf6ed611d8b46 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 17:41:14 -0700 Subject: [PATCH 12/24] Remove default_reduction_algorithm --- sycl/include/sycl/ext/oneapi/reduction.hpp | 60 +++++++++------------- sycl/include/sycl/handler.hpp | 4 +- sycl/include/sycl/reduction.hpp | 44 ++++++---------- 3 files changed, 42 insertions(+), 66 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index 15cc1be9684ad..f926969a1508f 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -522,28 +522,24 @@ struct is_placeholder_t> : public std::true_type {}; -/// Types representing specific reduction algorithms -/// Enables reduction_impl_algo to take additional algorithm-specific templates -template -class default_reduction_algorithm {}; -/// Templated class for implementations of specific reduction algorithms -template -class reduction_impl_algo; +template +struct accessor_dim_t { + static constexpr int value = 1; +}; + +template +struct accessor_dim_t< + accessor> { + static constexpr int value = AccessorDims; +}; -/// Original reduction algorithm is the default. It supports both USM and -/// accessors via a single class template -class reduction_impl_algo< - T, BinaryOperation, Dims, Extent, - default_reduction_algorithm, RedOutVar> - : public reduction_impl_common { + typename RedOutVar> +class reduction_impl_algo : public reduction_impl_common { using base = reduction_impl_common; - using self = - reduction_impl_algo, RedOutVar>; + using self = reduction_impl_algo; public: using reducer_type = reducer; @@ -553,12 +549,12 @@ class reduction_impl_algo< // Buffers and accessors always describe scalar reductions (i.e. Dims == 0) // The input buffer/accessor is allowed to have different dimensionality // AccessorDims also determines the dimensionality of some temp storage - static constexpr int accessor_dim = AccessorDims; - static constexpr int buffer_dim = (AccessorDims == 0) ? 1 : AccessorDims; + static constexpr int accessor_dim = accessor_dim_t::value; + static constexpr int buffer_dim = (accessor_dim == 0) ? 1 : accessor_dim; static constexpr access::placeholder is_placeholder = is_placeholder_t::value ? access::placeholder::true_t : access::placeholder::false_t; - using rw_accessor_type = accessor>; static constexpr bool has_atomic_add_float64 = @@ -746,16 +742,13 @@ template struct AreAllButLastReductions { /// This class encapsulates the reduction variable/accessor, /// the reduction operator and an optional operator identity. template + typename RedOutVar> class reduction_impl : private reduction_impl_base, - public reduction_impl_algo { + public reduction_impl_algo { private: - using algo = reduction_impl_algo; - using self = - reduction_impl; + using algo = reduction_impl_algo; + using self = reduction_impl; static constexpr bool is_known_identity = sycl::detail::IsKnownIdentityOp::value; @@ -2466,7 +2459,6 @@ tuple_select_elements(TupleT Tuple, std::index_sequence) { template detail::reduction_impl, accessor> reduction(accessor &Acc, const T &Identity, BinaryOperation BOp) { @@ -2482,7 +2474,6 @@ template ::value, detail::reduction_impl< T, BinaryOperation, 0, 1, - detail::default_reduction_algorithm, accessor>> reduction(accessor &Acc, BinaryOperation) { @@ -2494,8 +2485,7 @@ reduction(accessor &Acc, /// the computed reduction must be stored \param VarPtr, identity value /// \param Identity, and the binary operation used in the reduction. template -detail::reduction_impl, T *> +detail::reduction_impl reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { return {VarPtr, Identity, BOp}; } @@ -2506,10 +2496,8 @@ reduction(T *VarPtr, const T &Identity, BinaryOperation BOp) { /// operation used in the reduction. /// The identity value is not passed to this version as it is statically known. template -std::enable_if_t< - sycl::detail::IsKnownIdentityOp::value, - detail::reduction_impl, T *>> +std::enable_if_t::value, + detail::reduction_impl> reduction(T *VarPtr, BinaryOperation) { return {VarPtr}; } diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 220c2ea1122f3..789a6c94aba84 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -242,7 +242,7 @@ namespace ext { namespace oneapi { namespace detail { template + typename RedOutVar> class reduction_impl_algo; using cl::sycl::detail::enable_if_t; @@ -2667,7 +2667,7 @@ class __SYCL_EXPORT handler { // Make reduction friends to store buffers and arrays created for it // in handler from reduction methods. template + typename RedOutVar> friend class ext::oneapi::detail::reduction_impl_algo; #ifndef __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/reduction.hpp b/sycl/include/sycl/reduction.hpp index 0057c12cd31e9..e2b4facd83142 100644 --- a/sycl/include/sycl/reduction.hpp +++ b/sycl/include/sycl/reduction.hpp @@ -25,7 +25,6 @@ std::enable_if_t< has_known_identity::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm<1>, accessor>>> @@ -45,7 +44,6 @@ std::enable_if_t< !has_known_identity::value, ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm<1>, accessor>>> @@ -62,10 +60,9 @@ reduction(buffer, handler &, BinaryOperation, /// the given USM pointer \p Var, handler \p CGH, reduction operation /// \p Combiner, and optional reduction properties. template -std::enable_if_t::value, - ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm<1>, T *>> +std::enable_if_t< + has_known_identity::value, + ext::oneapi::detail::reduction_impl> reduction(T *Var, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = PropList.has_property(); @@ -78,10 +75,9 @@ reduction(T *Var, BinaryOperation, const property_list &PropList = {}) { /// The reduction algorithm may be less efficient for this variant as the /// reduction identity is not known statically and it is not provided by user. template -std::enable_if_t::value, - ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm<1>, T *>> +std::enable_if_t< + !has_known_identity::value, + ext::oneapi::detail::reduction_impl> reduction(T *, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. (void)PropList; @@ -96,7 +92,6 @@ reduction(T *, BinaryOperation, const property_list &PropList = {}) { template ext::oneapi::detail::reduction_impl< T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm<1>, accessor>> @@ -111,9 +106,7 @@ reduction(buffer Var, handler &CGH, const T &Identity, /// the given USM pointer \p Var, reduction identity value \p Identity, /// binary operation \p Combiner, and optional reduction properties. template -ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 0, 1, - ext::oneapi::detail::default_reduction_algorithm<1>, T *> +ext::oneapi::detail::reduction_impl reduction(T *Var, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -126,11 +119,9 @@ reduction(T *Var, const T &Identity, BinaryOperation Combiner, /// the given sycl::span \p Span, reduction operation \p Combiner, and /// optional reduction properties. template -std::enable_if_t::value, - ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 1, Extent, - ext::oneapi::detail::default_reduction_algorithm<1>, T *>> +std::enable_if_t< + Extent != dynamic_extent && has_known_identity::value, + ext::oneapi::detail::reduction_impl> reduction(span Span, BinaryOperation, const property_list &PropList = {}) { bool InitializeToIdentity = @@ -144,11 +135,9 @@ reduction(span Span, BinaryOperation, /// The reduction algorithm may be less efficient for this variant as the /// reduction identity is not known statically and it is not provided by user. template -std::enable_if_t::value, - ext::oneapi::detail::reduction_impl< - T, BinaryOperation, 1, Extent, - ext::oneapi::detail::default_reduction_algorithm<1>, T *>> +std::enable_if_t< + Extent != dynamic_extent && !has_known_identity::value, + ext::oneapi::detail::reduction_impl> reduction(span, BinaryOperation, const property_list &PropList = {}) { // TODO: implement reduction that works even when identity is not known. @@ -162,10 +151,9 @@ reduction(span, BinaryOperation, /// the given sycl::span \p Span, reduction identity value \p Identity, /// reduction operation \p Combiner, and optional reduction properties. template -std::enable_if_t, T *>> +std::enable_if_t< + Extent != dynamic_extent, + ext::oneapi::detail::reduction_impl> reduction(span Span, const T &Identity, BinaryOperation Combiner, const property_list &PropList = {}) { bool InitializeToIdentity = From add7ff24a8a9bde467b91040aa77816a6e83cee9 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 14 Jul 2022 21:56:34 -0700 Subject: [PATCH 13/24] make_reduction for partial types deduction + process buffer/span on the caller's side --- sycl/include/sycl/ext/oneapi/reduction.hpp | 91 ++++++++-------- sycl/include/sycl/reduction.hpp | 114 ++++++++++----------- 2 files changed, 101 insertions(+), 104 deletions(-) diff --git a/sycl/include/sycl/ext/oneapi/reduction.hpp b/sycl/include/sycl/ext/oneapi/reduction.hpp index f926969a1508f..7d72432d77ad3 100644 --- a/sycl/include/sycl/ext/oneapi/reduction.hpp +++ b/sycl/include/sycl/ext/oneapi/reduction.hpp @@ -535,6 +535,18 @@ struct accessor_dim_t< static constexpr int value = AccessorDims; }; +template struct get_red_t; +template struct get_red_t { + using type = T; +}; + +template +struct get_red_t< + accessor> { + using type = T; +}; + template class reduction_impl_algo : public reduction_impl_common { @@ -788,15 +800,14 @@ class reduction_impl /// SYCL-2020. /// Constructs reduction_impl when the identity value is statically known. - template * = nullptr> - reduction_impl(buffer Buffer, handler &CGH, - bool InitializeToIdentity) + reduction_impl(RedOutVar &Acc, handler &CGH, bool InitializeToIdentity) : algo(reducer_type::getIdentity(), BinaryOperation(), - InitializeToIdentity, rw_accessor_type{Buffer}) { + InitializeToIdentity, Acc) { algo::associateWithHandler(CGH); - if (Buffer.size() != 1) + if (Acc.size() != 1) throw sycl::runtime_error(errc::invalid, "Reduction variable must be a scalar.", PI_ERROR_INVALID_VALUE); @@ -819,15 +830,12 @@ class reduction_impl /// and user still passed the identity value. /// SYCL-2020. /// Constructs reduction_impl when the identity value is NOT known statically. - template * = nullptr> - reduction_impl(buffer Buffer, handler &CGH, - const T &Identity, BinaryOperation BOp, - bool InitializeToIdentity) - : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, - rw_accessor_type{Buffer}) { + template * = nullptr> + reduction_impl(RedOutVar &Acc, handler &CGH, const T &Identity, + BinaryOperation BOp, bool InitializeToIdentity) + : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, Acc) { algo::associateWithHandler(CGH); - if (Buffer.size() != 1) + if (Acc.size() != 1) throw sycl::runtime_error(errc::invalid, "Reduction variable must be a scalar.", PI_ERROR_INVALID_VALUE); @@ -861,21 +869,15 @@ class reduction_impl reduction_impl(T *VarPtr, const T &Identity, BinaryOperation BOp, bool InitializeToIdentity = false) : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, VarPtr) {} - - /// Constructs reduction_impl when the identity value is statically known - template * = nullptr> - reduction_impl(span Span, bool InitializeToIdentity = false) - : algo(reducer_type::getIdentity(), BinaryOperation(), - InitializeToIdentity, Span.data()) {} - - template * = nullptr> - reduction_impl(span Span, const T &Identity, BinaryOperation BOp, - bool InitializeToIdentity = false) - : algo(chooseIdentity(Identity), BOp, InitializeToIdentity, Span.data()) { - } }; +template +auto make_reduction(RedOutVar RedVar, RestTy &&... Rest) { + return reduction_impl::type, BinaryOp, Dims, + Extent, RedOutVar>{RedVar, std::forward(Rest)...}; +} + /// A helper to pass undefined (sycl::detail::auto_name) names unmodified. We /// must do that to avoid name collisions. template