diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29e9111dc72..355087444ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,8 +15,8 @@ env: BUILD_CONCURRENCY: 2 MACOS_BUILD_CONCURRENCY: 3 TEST_TIMEOUT: 360 - WINDOWS_TBB_DOWNLOAD_LINK: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/64957c0f-37bf-4408-909c-37ff52fe5119/w_tbb_oneapi_p_2021.11.0.49526.exe - WINDOWS_ICPX_DOWNLOAD_LINK: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/94e15cb5-4bcc-4fdd-91cf-0f819a54e42e/w_dpcpp-cpp-compiler_p_2024.0.2.28_offline.exe + WINDOWS_TBB_DOWNLOAD_LINK: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/c0b87e5c-1e1f-431f-b26e-dc250032e586/w_tbb_oneapi_p_2021.12.0.500_offline.exe + WINDOWS_ICPX_DOWNLOAD_LINK: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/a1d6c917-05ab-4883-b67b-4bd60abb74e5/w_dpcpp-cpp-compiler_p_2024.1.0.469_offline.exe WINDOWS_ONEAPI_PATH: C:\Program Files (x86)\Intel\oneAPI LINUX_ONEAPI_PATH: /opt/intel/oneapi # TODO: get rid of a deprecated configuration: IntelĀ® C++ Compiler Classic @@ -172,6 +172,12 @@ jobs: build_type: release backend: serial device_type: HOST + - os: ubuntu-20.04 + cxx_compiler: icpx + std: 20 + build_type: release + backend: dpcpp + device_type: HOST steps: - uses: actions/checkout@v3 - name: Set up Intel APT repository diff --git a/CMakeLists.txt b/CMakeLists.txt index 61927a3aca9..a2ddcd42a17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -311,6 +311,14 @@ if (ONEDPL_BACKEND MATCHES "^(tbb|dpcpp|dpcpp_only)$") $<$,$>:-fsycl-link> ${ONEDPL_AOT_OPTIONS} ) + + # if C++20 or newer, include Distributed Ranges (experimental) + if (CMAKE_CXX_STANDARD GREATER_EQUAL 20) + set(ONEDPL_USE_DR TRUE) + message(STATUS "Adding Distributed Ranges to the project") + else() + message(STATUS "C++20 required to use Distributed Ranges in oneDPL") + endif() endif() elseif(ONEDPL_BACKEND MATCHES "^(serial)$") diff --git a/include/oneapi/dpl/distributed-ranges b/include/oneapi/dpl/distributed-ranges index dadf6aa73a3..4d79e5598d5 100644 --- a/include/oneapi/dpl/distributed-ranges +++ b/include/oneapi/dpl/distributed-ranges @@ -13,6 +13,11 @@ #include "oneapi/dpl/internal/common_config.h" #include "oneapi/dpl/pstl/onedpl_config.h" +// #if _ONEDPL_BACKEND_SYCL != 0 && __INTEL_LLVM_COMPILER >= 20230000 +#if defined(ONEDPL_USE_DISTRIBUTED_RANGES) #include "oneapi/dpl/internal/distributed_ranges_impl/shp.hpp" +#else +#error "C++20 required to use Distributed Ranges" +#endif #endif /* _ONEDPL_DISTRIBUTED_RANGES */ diff --git a/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_submitters.h b/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_submitters.h index b6432826ebb..a44592166e0 100644 --- a/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_submitters.h +++ b/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_submitters.h @@ -18,6 +18,7 @@ #include "../../../pstl/hetero/dpcpp/utils_ranges_sycl.h" #include "../../../pstl/hetero/dpcpp/parallel_backend_sycl_utils.h" +#include "../../../pstl/hetero/dpcpp/sycl_traits.h" //SYCL traits specialization for some oneDPL types. #include "esimd_radix_sort_kernels.h" #include "esimd_defs.h" diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index cd080b1e6a1..f053974f7b2 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -30,10 +30,10 @@ namespace dpl namespace __internal { -template = 0> +template auto -__pattern_walk1_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) +__pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) { auto __n = __last - __first; assert(__n > 0); @@ -43,19 +43,19 @@ __pattern_walk1_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forw auto __buf = __keep(__first, __last); auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf.all_view()); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()); return __future_obj; } template = 0> + typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, + typename _Function> auto -__pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f) +__pattern_walk2_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { auto __n = __last1 - __first1; assert(__n > 0); @@ -67,8 +67,8 @@ __pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo auto __buf2 = __keep2(__first2, __first2 + __n); auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view()); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); if constexpr (_IsSync::value) __future.wait(); @@ -76,11 +76,11 @@ __pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo return __future.__make_future(__first2 + __n); } -template = 0> +template auto -__pattern_walk3_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) +__pattern_walk3_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) { auto __n = __last1 - __first1; assert(__n > 0); @@ -95,20 +95,22 @@ __pattern_walk3_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator3>(); auto __buf3 = __keep3(__first3, __first3 + __n); - auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view(), __buf3.all_view()); + auto __future = + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, + __buf1.all_view(), __buf2.all_view(), __buf3.all_view()); return __future.__make_future(__first3 + __n); } -template = 0> +template auto -__pattern_walk2_brick_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick) +__pattern_walk2_brick_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { return __pattern_walk2_async( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __first1, __last1, __first2, __brick); } @@ -117,11 +119,10 @@ __pattern_walk2_brick_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_transform_reduce_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { @@ -141,7 +142,7 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterato return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf1.all_view(), __buf2.all_view()); } @@ -150,12 +151,12 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterato // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) +__pattern_transform_reduce_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) { assert(__first < __last); @@ -168,18 +169,18 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __f return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf.all_view()); } -template = 0> +template auto -__pattern_fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value) +__pattern_fill_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, const _T& __value) { return __pattern_walk1_async( - ::std::forward<_ExecutionPolicy>(__exec), + __tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), fill_functor<_T>{__value}); @@ -189,13 +190,12 @@ __pattern_fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa // transform_scan //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_scan_base_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, - _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, - _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_base_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _InitType __init, _BinaryOperation __binary_op, _Inclusive) { assert(__first < __last); @@ -206,39 +206,39 @@ __pattern_transform_scan_base_async(_ExecutionPolicy&& __exec, _Iterator1 __firs auto __buf2 = __keep2(__result, __result + __n); auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __unary_op, __init, - __binary_op, _Inclusive{}); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __unary_op, + __init, __binary_op, _Inclusive{}); return __res.__make_future(__result + __n); } -template = 0> +template auto -__pattern_transform_scan_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _Type __init, + _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - return __pattern_transform_scan_base_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); + return __pattern_transform_scan_base_async(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); } // scan without initial element -template = 0> +template auto -__pattern_transform_scan_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _BinaryOperation __binary_op, _Inclusive) { using _ValueType = typename ::std::iterator_traits<_Iterator1>::value_type; using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_ValueType>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - return __pattern_transform_scan_base_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - __unary_op, _InitType{}, __binary_op, _Inclusive{}); + return __pattern_transform_scan_base_async(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, __unary_op, _InitType{}, __binary_op, _Inclusive{}); } } // namespace __internal diff --git a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h index 26eca467131..dfd4a969ec8 100644 --- a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h +++ b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h @@ -43,9 +43,11 @@ auto transform_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk2_async( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}); return ret_val; } @@ -59,9 +61,11 @@ transform_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI _ForwardIterator2 __first2, _ForwardIterator __result, _BinaryOperation __op, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk3_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op))); return ret_val; } @@ -73,10 +77,12 @@ auto copy_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _Events&&... __dependencies) { + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk2_brick_async( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy{}); return ret_val; } @@ -93,8 +99,11 @@ sort_async(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Comp auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __par_backend_hetero::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), - __comp, oneapi::dpl::identity{}); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + return __par_backend_hetero::__parallel_stable_sort(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __comp, oneapi::dpl::identity{}); } template (__dependencies)...); - auto ret_val = - oneapi::dpl::__internal::__pattern_walk1_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); + auto ret_val = oneapi::dpl::__internal::__pattern_walk1_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); return ret_val; } @@ -130,10 +141,12 @@ auto reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + wait_for_all(::std::forward<_Events>(__dependencies)...); - auto ret_val = oneapi::dpl::__internal::__pattern_transform_reduce_async(::std::forward<_ExecutionPolicy>(__exec), - __first, __last, __init, __binary_op, - oneapi::dpl::__internal::__no_op()); + auto ret_val = oneapi::dpl::__internal::__pattern_transform_reduce_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, + oneapi::dpl::__internal::__no_op()); return ret_val; } @@ -165,9 +178,11 @@ auto fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_fill_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __value); + return oneapi::dpl::__internal::__pattern_fill_async(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __value); } // [async.transform_reduce] @@ -180,9 +195,12 @@ auto transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt1 __last1, _ForwardIt2 __first2, _T __init, _BinaryOp1 __binary_op1, _BinaryOp2 __binary_op2, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_reduce_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, __binary_op2); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, + __binary_op2); } template (__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_reduce_async(::std::forward<_ExecutionPolicy>(__exec), __first, - __last, __init, __binary_op, __unary_op); + return oneapi::dpl::__internal::__pattern_transform_reduce_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, __unary_op); } template ::value_type; wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - ::std::plus<_ValueType>(), /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), ::std::plus<_ValueType>(), /*inclusive=*/::std::true_type()); } template (__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __binary_op, /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __binary_op, /*inclusive=*/::std::true_type()); } template (__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, __binary_op, /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, __binary_op, /*inclusive=*/::std::true_type()); } template (__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, ::std::plus<_T>(), /*exclusive=*/::std::false_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, ::std::plus<_T>(), /*exclusive=*/::std::false_type()); } template (__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, __binary_op, /*exclusive=*/::std::false_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, __binary_op, /*exclusive=*/::std::false_type()); } template (__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, + return oneapi::dpl::__internal::__pattern_transform_scan_async(__dispatch_tag, + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __init, __binary_op, /*exclusive=*/::std::false_type()); } @@ -299,10 +332,12 @@ transform_inclusive_scan_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt2 __first2, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __first2, __unary_op, __binary_op, - /*inclusive=*/::std::true_type()); + return oneapi::dpl::__internal::__pattern_transform_scan_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __binary_op, + /*inclusive=*/::std::true_type()); } template (__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, + return oneapi::dpl::__internal::__pattern_transform_scan_async(__dispatch_tag, + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __init, __binary_op, /*inclusive=*/::std::true_type()); } diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 0c689fe6b8e..59c2f74ea82 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -68,36 +68,42 @@ struct custom_brick } }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +lower_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::lower_bound(start, end, val, comp) - start; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +upper_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::upper_bound(start, end, val, comp) - start; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +binary_search_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::binary_search(start, end, val, comp); @@ -105,11 +111,11 @@ binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, In } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +lower_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -128,18 +134,18 @@ lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, Inpu auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(_BackendTag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); return result + value_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +upper_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -158,18 +164,18 @@ upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, Inpu auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(_BackendTag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); return result + value_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +binary_search_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -188,7 +194,7 @@ binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, In auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(_BackendTag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); @@ -204,8 +210,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy lower_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::lower_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::lower_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template lower_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::lower_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, comp); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::lower_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Lower Bound end @@ -225,8 +236,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy upper_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::upper_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::upper_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template upper_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::upper_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, comp); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::upper_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Upper Bound end @@ -247,8 +263,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy binary_search(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::binary_search_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::binary_search_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template binary_search(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::binary_search_impl(::std::forward(policy), start, end, value_start, value_end, result, - comp); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::binary_search_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Binary search end diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp index 074f7f35501..c2bd549ba1e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp @@ -4,56 +4,57 @@ #pragma once -#include +#include -namespace dr { +namespace experimental::dr +{ template concept remote_iterator = - std::forward_iterator && requires(I &iter) { dr::ranges::rank(iter); }; + std::forward_iterator && requires(I &iter) { experimental::dr::ranges::rank(iter); }; template concept remote_range = - rng::forward_range && requires(R &r) { dr::ranges::rank(r); }; + rng::forward_range && requires(R &r) { experimental::dr::ranges::rank(r); }; template concept distributed_range = - rng::forward_range && requires(R &r) { dr::ranges::segments(r); }; + rng::forward_range && requires(R &r) { experimental::dr::ranges::segments(r); }; template concept remote_contiguous_iterator = std::random_access_iterator && requires(I &iter) { - dr::ranges::rank(iter); - { dr::ranges::local(iter) } -> std::contiguous_iterator; + experimental::dr::ranges::rank(iter); + { experimental::dr::ranges::local(iter) } -> std::contiguous_iterator; }; template concept distributed_iterator = std::forward_iterator && requires(I &iter) { - dr::ranges::segments(iter); + experimental::dr::ranges::segments(iter); }; template concept remote_contiguous_range = remote_range && rng::random_access_range && requires(R &r) { - { dr::ranges::local(r) } -> rng::contiguous_range; + { experimental::dr::ranges::local(r) } -> rng::contiguous_range; }; template concept distributed_contiguous_range = distributed_range && rng::random_access_range && requires(R &r) { - { dr::ranges::segments(r) } -> rng::random_access_range; + { experimental::dr::ranges::segments(r) } -> rng::random_access_range; } && remote_contiguous_range< - rng::range_value_t()))>>; + rng::range_value_t()))>>; template concept distributed_contiguous_iterator = distributed_iterator && rng::random_access_iterator && requires(Iter &iter) { - { dr::ranges::segments(iter) } -> rng::random_access_range; + { experimental::dr::ranges::segments(iter) } -> rng::random_access_range; } && - remote_contiguous_range()))>>; -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/communicator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/communicator.hpp deleted file mode 100644 index 596a298bae8..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/communicator.hpp +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -namespace dr { - -class communicator { -public: - communicator(MPI_Comm comm = MPI_COMM_WORLD) : mpi_comm_(comm) { - int rank, size; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - rank_ = rank; - size_ = size; - } - - auto size() const { return size_; } - auto rank() const { return rank_; } - auto prev() const { return (rank() + size() - 1) % size(); } - auto next() const { return (rank() + 1) % size(); } - auto first() const { return rank() == 0; } - auto last() const { return rank() == size() - 1; } - - MPI_Comm mpi_comm() const { return mpi_comm_; } - - void barrier() const { -#ifdef DRISHMEM - DRLOG("calling COMM barrier (by calling fence) in ISHMEM"); - ishmem_fence(); -#endif - DRLOG("calling COMM barrier in MPI"); - MPI_Barrier(mpi_comm_); - DRLOG("COMM barrier finished"); - } - - void bcast(void *src, std::size_t count, std::size_t root) const { - MPI_Bcast(src, count, MPI_BYTE, root, mpi_comm_); - } - - void scatter(const void *src, void *dst, std::size_t count, - std::size_t root) const { - MPI_Scatter(src, count, MPI_BYTE, dst, count, MPI_BYTE, root, mpi_comm_); - } - - template - void scatter(const std::span src, T &dst, std::size_t root) const { - assert(rng::size(src) >= size_); - scatter(rng::data(src), &dst, sizeof(T), root); - } - - void scatterv(const void *src, int *counts, int *offsets, void *dst, - int dst_count, std::size_t root) const { - assert(counts == nullptr || counts[rank()] == dst_count); - MPI_Scatterv(src, counts, offsets, MPI_BYTE, dst, dst_count, MPI_BYTE, root, - mpi_comm_); - } - - void gather(const void *src, void *dst, std::size_t count, - std::size_t root) const { - MPI_Gather_c(src, count, MPI_BYTE, dst, count, MPI_BYTE, root, mpi_comm_); - } - - template - void gather(const T &src, std::span dst, std::size_t root) const { - assert(rng::size(dst) >= size_); - gather(&src, rng::data(dst), sizeof(T), root); - } - - template - void all_gather(const T *src, T *dst, std::size_t count) const { - // Gather size elements from each rank - MPI_Allgather_c(src, count * sizeof(T), MPI_BYTE, dst, count * sizeof(T), - MPI_BYTE, mpi_comm_); - } - - template - void all_gather(const T &src, std::vector &dst) const { - assert(rng::size(dst) >= size_); - all_gather(&src, rng::data(dst), 1); - } - - template - void all_gather(const R &src, R &dst) const { - assert(rng::size(dst) >= size_ * rng::size(src)); - all_gather(rng::data(src), rng::data(dst), rng::size(src)); - } - - template - void i_all_gather(const T *src, T *dst, std::size_t count, - MPI_Request *req) const { - // Gather size elements from each rank - MPI_Iallgather_c(src, count * sizeof(T), MPI_BYTE, dst, count * sizeof(T), - MPI_BYTE, mpi_comm_, req); - } - - template - void i_all_gather(const T &src, std::vector &dst, MPI_Request *req) const { - assert(rng::size(dst) >= size_); - i_all_gather(&src, rng::data(dst), 1, req); - } - - void gatherv(const void *src, int *counts, int *offsets, void *dst, - std::size_t root) const { - MPI_Gatherv(src, counts[rank()], MPI_BYTE, dst, counts, offsets, MPI_BYTE, - root, mpi_comm_); - } - - // pointer with explicit tag - template - void isend(const T *data, std::size_t count, std::size_t dst_rank, auto tag, - MPI_Request *request) const { - MPI_Isend_c(data, count * sizeof(T), MPI_BYTE, dst_rank, int(tag), - mpi_comm_, request); - } - - // pointer, no tag - template - void isend(const T *data, std::size_t count, std::size_t dst_rank, - MPI_Request *request) const { - isend(data, count, dst_rank, 0, request); - } - - // range and tag - template - void isend(const R &data, std::size_t dst_rank, auto tag, - MPI_Request *request) const { - isend(rng::data(data), rng::size(data), dst_rank, tag, request); - } - - // range, no tag - template - void isend(const R &data, std::size_t dst_rank, MPI_Request *request) const { - isend(data, dst_rank, 0, request); - } - - // pointer and tag - template - void irecv(T *data, std::size_t size, std::size_t src_rank, auto tag, - MPI_Request *request) const { - MPI_Irecv_c(data, size * sizeof(T), MPI_BYTE, src_rank, int(tag), mpi_comm_, - request); - } - - // pointer, no tag - template - void irecv(T *data, std::size_t size, std::size_t src_rank, - MPI_Request *request) const { - irecv(data, size, src_rank, 0, request); - } - - // range and tag - template - void irecv(R &data, std::size_t src_rank, int tag, - MPI_Request *request) const { - irecv(rng::data(data), rng::size(data), src_rank, tag, request); - } - - // range, no tag - template - void irecv(R &data, std::size_t src_rank, MPI_Request *request) const { - irecv(data, src_rank, 0, request); - } - - template - void alltoall(const R &sendr, R &recvr, std::size_t count) { - alltoall(rng::data(sendr), rng::data(recvr), count); - } - - template - void alltoall(const T *send, T *receive, std::size_t count) { - std::size_t bytes = count * sizeof(T); - - timer time; - MPI_Alltoall_c(send, bytes, MPI_BYTE, receive, bytes, MPI_BYTE, mpi_comm_); - dr::drlog.debug(dr::logger::mpi, "alltoall bytes: {} elapsed: {}\n", bytes, - time.elapsed()); - } - - template - void alltoallv(const SendR &sendbuf, const std::vector &sendcnt, - const std::vector &senddsp, RecvR &recvbuf, - const std::vector &recvcnt, - const std::vector &recvdsp) { - using valT = typename RecvR::value_type; - - static_assert(std::is_same_v, - std::ranges::range_value_t>); - - assert(rng::size(sendcnt) == size_); - assert(rng::size(senddsp) == size_); - assert(rng::size(recvcnt) == size_); - assert(rng::size(recvdsp) == size_); - - std::vector _sendcnt(size_); - std::vector _senddsp(size_); - std::vector _recvcnt(size_); - std::vector _recvdsp(size_); - - rng::transform(sendcnt, _sendcnt.begin(), - [](auto e) { return e * sizeof(valT); }); - rng::transform(senddsp, _senddsp.begin(), - [](auto e) { return e * sizeof(valT); }); - rng::transform(recvcnt, _recvcnt.begin(), - [](auto e) { return e * sizeof(valT); }); - rng::transform(recvdsp, _recvdsp.begin(), - [](auto e) { return e * sizeof(valT); }); - - MPI_Alltoallv(rng::data(sendbuf), rng::data(_sendcnt), rng::data(_senddsp), - MPI_BYTE, rng::data(recvbuf), rng::data(_recvcnt), - rng::data(_recvdsp), MPI_BYTE, mpi_comm_); - } - - bool operator==(const communicator &other) const { - return mpi_comm_ == other.mpi_comm_; - } - -private: - MPI_Comm mpi_comm_; - std::size_t rank_; - std::size_t size_; -}; - -class rma_window { -public: - void create(communicator comm, void *data, std::size_t size) { - local_data_ = data; - communicator_ = comm; - DRLOG("win create:: size: {} data:{}", size, data); - MPI_Win_create(data, size, 1, MPI_INFO_NULL, comm.mpi_comm(), &win_); - } - - template auto local_data() { - return static_cast(local_data_); - } - - void free() { MPI_Win_free(&win_); } - - bool operator==(const rma_window other) const noexcept { - return this->win_ == other.win_; - } - - void set_null() { win_ = MPI_WIN_NULL; } - bool null() const noexcept { return win_ == MPI_WIN_NULL; } - - template T get(std::size_t rank, std::size_t disp) const { - T dst; - get(&dst, sizeof(T), rank, disp * sizeof(T)); - return dst; - } - - void get(void *dst, std::size_t size, std::size_t rank, - std::size_t disp) const { - DRLOG("MPI comm get:: ({}:{}:{})", rank, disp, size); - MPI_Request request; - MPI_Rget(dst, size, MPI_BYTE, rank, disp, size, MPI_BYTE, win_, &request); - MPI_Wait(&request, MPI_STATUS_IGNORE); - } - - void put(const auto &src, std::size_t rank, std::size_t disp) const { - put(&src, sizeof(src), rank, disp * sizeof(src)); - } - - void put(const void *src, std::size_t size, std::size_t rank, - std::size_t disp) const { - DRLOG("MPI comm put:: ({}:{}:{})", rank, disp, size); - MPI_Request request; - MPI_Rput(src, size, MPI_BYTE, rank, disp, size, MPI_BYTE, win_, &request); - DRLOG("MPI comm wait:: ({}:{}:{})", rank, disp, size); - MPI_Wait(&request, MPI_STATUS_IGNORE); - DRLOG("MPI comm wait finished:: ({}:{}:{})", rank, disp, size); - } - - void fence() const { - if (win_ != MPI_WIN_NULL) { - DRLOG("MPI comm fence:: win:{}", win_); - MPI_Win_fence(0, win_); - DRLOG("MPI comm fence finished:: win:{}", win_); - } else { - DRLOG("MPI comm fence skipped because win is NULL"); - } - } - - void flush(std::size_t rank) const { - DRLOG("MPI comm flush:: rank:{} win:{}", rank, win_); - MPI_Win_flush(rank, win_); - DRLOG("MPI comm flush finished:: rank:{} win:{}", rank, win_); - } - - const auto &communicator() const { return communicator_; } - auto mpi_win() { return win_; } - -private: - dr::communicator communicator_; - MPI_Win win_ = MPI_WIN_NULL; - void *local_data_ = nullptr; -}; - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp index 435282469ca..a9fd556a1c5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp @@ -4,9 +4,9 @@ #pragma once -#include +#include -namespace dr { +namespace experimental::dr { namespace __detail { @@ -59,4 +59,4 @@ inline constexpr auto enumerate = enumerate_fn_{}; } // namespace __detail -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/format_shim.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/format_shim.hpp deleted file mode 100644 index 9eef4c8bb49..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/format_shim.hpp +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#ifdef DR_FORMAT -#include -#include -#endif - -// Workaround for doxygen warning about internal inconsistency -namespace fmt {} diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp index a317e97bcf9..a9d072c35c2 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr { +namespace experimental::dr { namespace { template @@ -96,19 +96,19 @@ template class index { index_type second; }; -} // namespace dr +} // namespace experimental::dr namespace std { template -struct tuple_element> +struct tuple_element> : tuple_element> {}; template -struct tuple_size> : integral_constant {}; +struct tuple_size> : integral_constant {}; template -inline constexpr I get(dr::index index) +inline constexpr I get(experimental::dr::index index) requires(Index <= 1) { if constexpr (Index == 0) { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp index 9b287e6602d..5cdc1eb36a5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp @@ -7,9 +7,9 @@ #include #include -#include +#include -namespace dr { +namespace experimental::dr { namespace { @@ -197,4 +197,4 @@ template class iterator_adaptor { accessor_type accessor_; }; -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp deleted file mode 100644 index 50642205c57..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -#include "../source_location/source_location.hpp" - -#include "format_shim.hpp" -#include "ranges_shim.hpp" - -namespace dr { - -class timer { -public: - timer() : begin_(std::chrono::high_resolution_clock::now()) {} - - auto elapsed() { - auto end = std::chrono::high_resolution_clock::now(); - return std::chrono::duration(end - begin_).count(); - } - -private: - std::chrono::time_point begin_; -}; - -class logger { -public: - enum filters { base, for_each, transpose, mdspan_view, mpi, last }; - - logger() { rng::fill(enabled_, true); } - - void set_file(std::ofstream &fout) { fout_ = &fout; } - - void filter(const std::vector &names) { - if (names.size() == 0) { - return; - } - - // Disable everything - rng::fill(enabled_, false); - - // Enabled selected filters - for (const auto &name : names) { - std::size_t index = filters::last; - for (std::size_t i = 0; i < filter_names_.size(); i++) { - if (name == filter_names_[i]) { - index = i; - } - } - if (index == filters::last) { - std::cerr << "Ignoring unrecognized filter: " << name << "\n"; - } else { - enabled_[index] = true; - } - } - } - -#ifdef DR_FORMAT - - template - void debug(const nostd::source_location &location, - fmt::format_string format, Args &&...args) { - if (fout_ && enabled_[filters::base]) { - *fout_ << fmt::format(format, std::forward(args)...) << " <" - << location.file_name() << ":" << location.line() << ">\n"; - fout_->flush(); - } - } - - template - void debug(fmt::format_string format, Args &&...args) { - debug(filters::base, format, std::forward(args)...); - } - - template - void debug(filters filter, fmt::format_string format, - Args &&...args) { - if (fout_ && enabled_[filter]) { - *fout_ << fmt::format(format, std::forward(args)...); - fout_->flush(); - } - } - -#else - - template - void debug(const nostd::source_location &location, std::string format, - Args &&...args) {} - - template void debug(std::string format, Args &&...args) {} - - template - void debug(filters filter, std::string format, Args &&...args) {} - -#endif - -private: - std::ofstream *fout_ = nullptr; - std::array enabled_; - std::array filter_names_ = { - "base", "for_each", "transpose", "mdspan_view", "mpi"}; -}; - -inline logger drlog; - -#define DRLOG(...) \ - dr::drlog.debug(nostd::source_location::current(), __VA_ARGS__) - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdarray_shim.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdarray_shim.hpp deleted file mode 100644 index eff112d0d79..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdarray_shim.hpp +++ /dev/null @@ -1,7 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_shim.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_shim.hpp deleted file mode 100644 index 3b0230e9b75..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_shim.hpp +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#define MDSPAN_NAMESPACE std::experimental -namespace md = MDSPAN_NAMESPACE; diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_utils.hpp deleted file mode 100644 index d23c92a9b7b..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_utils.hpp +++ /dev/null @@ -1,335 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -namespace dr::__detail { - -template auto dims(md::dextents extents) { - if constexpr (Rank == 1) { - return std::tuple(extents.extent(0)); - } else if constexpr (Rank == 2) { - return std::tuple(extents.extent(0), extents.extent(1)); - } else if constexpr (Rank == 3) { - return std::tuple(extents.extent(0), extents.extent(1), extents.extent(2)); - } else { - assert(false); - } -} - -template auto shape_to_strides(const Index &shape) { - const std::size_t rank = rng::size(shape); - Index strides; - strides[rank - 1] = 1; - for (std::size_t i = 1; i < rank; i++) { - strides[rank - i - 1] = strides[rank - i] * shape[rank - i]; - } - return strides; -} - -template -auto linear_to_index(std::size_t linear, const Index &shape) { - Index index, strides(shape_to_strides(shape)); - - for (std::size_t i = 0; i < rng::size(shape); i++) { - index[i] = linear / strides[i]; - linear = linear % strides[i]; - } - - return index; -} - -template -concept mdspan_like = requires(Mdspan &mdspan) { - mdspan.rank(); - mdspan.extents(); -}; - -template -concept mdarray_like = requires(Mdarray &mdarray) { mdarray.to_mdspan(); }; - -template using dr_extents = std::array; -template using md_extents = md::dextents; - -// -// Mdspan accessor using an iterator -// -template class mdspan_iter_accessor { -public: - using data_handle_type = Iter; - using reference = std::iter_reference_t; - using offset_policy = mdspan_iter_accessor; - - constexpr mdspan_iter_accessor() noexcept = default; - constexpr auto access(Iter iter, std::size_t index) const { - return iter[index]; - } - - constexpr auto offset(Iter iter, std::size_t index) const noexcept { - return iter + index; - } -}; - -template -auto make_submdspan_impl(M mdspan, const dr_extents &starts, - const dr_extents &ends, - std::index_sequence) { - return md::submdspan(mdspan, std::tuple(starts[indexes], ends[indexes])...); -} - -// Mdspan accepts slices, but that is hard to work with because it -// requires parameter packs. Work with starts/size vectors internally -// and use slices at the interface -template -auto make_submdspan(auto mdspan, const std::array &starts, - const std::array &ends) { - return make_submdspan_impl(mdspan, starts, ends, - std::make_index_sequence{}); -} - -template -void mdspan_foreach(md_extents extents, Op op, - dr_extents index = dr_extents(), - std::size_t rank = 0) { - for (index[rank] = 0; index[rank] < extents.extent(rank); index[rank]++) { - if (rank == Rank - 1) { - op(index); - } else { - mdspan_foreach(extents, op, index, rank + 1); - } - } -} - -// Pack mdspan into contiguous container -template -auto mdspan_copy(Src src, std::forward_iterator auto dst) { - __detail::event event; - - constexpr std::size_t rank = std::remove_cvref_t::rank(); - if (rank >= 2 && rank <= 3 && mhp::use_sycl()) { -#ifdef SYCL_LANGUAGE_VERSION - constexpr std::size_t rank = std::remove_cvref_t::rank(); - if constexpr (rank == 2) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), sycl::range(src.extent(0), src.extent(1)), - [src, dst](auto idx) { - dst[idx[0] * src.extent(1) + idx[1]] = src(idx); - }); - } else if constexpr (rank == 3) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), - sycl::range(src.extent(0), src.extent(1), src.extent(2)), - [src, dst](auto idx) { - dst[idx[0] * src.extent(1) * src.extent(2) + - idx[1] * src.extent(2) + idx[2]] = src(idx); - }); - } else { - assert(false); - } -#endif - } else { - auto pack = [src, &dst](auto index) { *dst++ = src(index); }; - mdspan_foreach(src.extents(), pack); - } - - return event; -} - -// unpack contiguous container into mdspan -template -auto mdspan_copy(std::forward_iterator auto src, Dst dst) { - __detail::event event; - - constexpr std::size_t rank = std::remove_cvref_t::rank(); - if (rank >= 2 && rank <= 3 && mhp::use_sycl()) { -#ifdef SYCL_LANGUAGE_VERSION - if constexpr (rank == 2) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), sycl::range(dst.extent(0), dst.extent(1)), - [src, dst](auto idx) { - dst(idx) = src[idx[0] * dst.extent(1) + idx[1]]; - }); - } else if constexpr (rank == 3) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), - sycl::range(dst.extent(0), dst.extent(1), dst.extent(2)), - [src, dst](auto idx) { - dst(idx) = src[idx[0] * dst.extent(1) * dst.extent(2) + - idx[1] * dst.extent(2) + idx[2]]; - }); - } else { - assert(false); - } -#endif - } else { - auto unpack = [&src, dst](auto index) { dst(index) = *src++; }; - mdspan_foreach(dst.extents(), unpack); - } - - return event; -} - -// copy mdspan to mdspan -auto mdspan_copy(mdspan_like auto src, mdspan_like auto dst) { - __detail::event event; - - assert(src.extents() == dst.extents()); - - constexpr std::size_t rank = std::remove_cvref_t::rank(); - if (rank >= 2 && rank <= 3 && mhp::use_sycl()) { -#ifdef SYCL_LANGUAGE_VERSION - dr::drlog.debug("mdspan_copy using sycl\n"); - if constexpr (rank == 2) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), sycl::range(dst.extent(0), dst.extent(1)), - [src, dst](auto idx) { dst(idx) = src(idx); }); - } else if constexpr (rank == 3) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), - sycl::range(dst.extent(0), dst.extent(1), dst.extent(2)), - [src, dst](auto idx) { dst(idx) = src(idx); }); - } else { - assert(false); - } -#endif - } else { - - auto copy = [src, dst](auto index) { dst(index) = src(index); }; - mdspan_foreach(src.extents(), copy); - } - - return event; -} - -// For operator(), rearrange indices according to template arguments. -// -// For mdtranspose a(b); -// -// a(1, 2, 3) references b(3, 1, 2) -// -template -class mdtranspose : public Mdspan { -private: - static constexpr std::size_t rank_ = Mdspan::rank(); - -public: - // Inherit constructors from base class - mdtranspose(Mdspan &mdspan) : Mdspan(mdspan) {} - - // rearrange indices according to template arguments - template - auto &operator()(Indexes... indexes) const { - std::tuple index(indexes...); - return Mdspan::operator()(std::get(index)...); - } - auto &operator()(std::array index) const { - return Mdspan::operator()(index[Is]...); - } - - auto extents() const { - // To get the extents, we must invert the index mapping - std::array from_transposed({Is...}); - std::array extents_t; - for (std::size_t i = 0; i < rank_; i++) { - extents_t[from_transposed[i]] = Mdspan::extent(i); - } - - return md_extents(extents_t); - } - auto extent(std::size_t d) const { return extents().extent(d); } -}; - -} // namespace dr::__detail - -template -struct fmt::formatter : public formatter { - template - auto format(Mdspan mdspan, FmtContext &ctx) const { - std::array index; - rng::fill(index, 0); - format_mdspan(ctx, mdspan, index, 0); - return ctx.out(); - } - - void format_mdspan(auto &ctx, auto mdspan, auto &index, - std::size_t dim) const { - for (std::size_t i = 0; i < mdspan.extent(dim); i++) { - index[dim] = i; - if (dim == mdspan.rank() - 1) { - if (i == 0) { - format_to(ctx.out(), "{}: ", index); - } - format_to(ctx.out(), "{:4} ", mdspan(index)); - } else { - format_mdspan(ctx, mdspan, index, dim + 1); - } - } - format_to(ctx.out(), "\n"); - } -}; - -namespace MDSPAN_NAMESPACE { - -template -bool operator==(const M1 &m1, const M2 &m2) { - constexpr std::size_t rank1 = M1::rank(), rank2 = M2::rank(); - static_assert(rank1 == rank2); - if (dr::__detail::dims(m1.extents()) != - dr::__detail::dims(m2.extents())) { - return false; - } - - // See mdspan_foreach for a way to generalize this to all ranks - if constexpr (M1::rank() == 1) { - for (std::size_t i = 0; i < m1.extent(0); i++) { - if (m1(i) != m2(i)) { - return false; - } - } - } else if constexpr (M1::rank() == 2) { - for (std::size_t i = 0; i < m1.extent(0); i++) { - for (std::size_t j = 0; j < m1.extent(1); j++) { - if (m1(i, j) != m2(i, j)) { - return false; - } - } - } - } else if constexpr (M1::rank() == 3) { - for (std::size_t i = 0; i < m1.extent(0); i++) { - for (std::size_t j = 0; j < m1.extent(1); j++) { - for (std::size_t k = 0; k < m1.extent(2); k++) { - if (m1(i, j, k) != m2(i, j, k)) { - return false; - } - } - } - } - } else { - assert(false); - } - - return true; -} - -template -inline std::ostream &operator<<(std::ostream &os, const M &m) { - if constexpr (dr::__detail::mdarray_like) { - os << fmt::format("\n{}", m.to_mdspan()); - } else { - os << fmt::format("\n{}", m); - } - return os; -} - -} // namespace MDSPAN_NAMESPACE - -namespace dr { - -template -concept distributed_mdspan_range = - distributed_range && requires(R &r) { r.mdspan(); }; - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/memory.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/memory.hpp deleted file mode 100644 index 610d0eb4e9b..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/memory.hpp +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -namespace dr { - -template struct default_memory { - using value_type = T; - std::allocator std_allocator; - - T *allocate(std::size_t size) { - auto p = std_allocator.allocate(size); - assert(p != nullptr); - memset(p, 0, sizeof(T) * size); - return p; - } - - template F *allocate(std::size_t size) { - std::allocator allocator; - auto p = allocator.allocate(size); - assert(p != nullptr); - memset(p, 0, sizeof(F) * size); - return p; - } - - constexpr void deallocate(T *p, std::size_t n) { - std_allocator.deallocate(p, n); - } - - template void deallocate(F *p, std::size_t n) { - std::allocator allocator; - allocator.deallocate(p, n); - p = nullptr; - } - - void memcpy(void *dst, const void *src, std::size_t numBytes) { - std::memcpy(dst, src, numBytes); - } - - template void offload(F lambda) { lambda(); } -}; - -#ifdef SYCL_LANGUAGE_VERSION -template struct sycl_memory { - using value_type = T; - using device_type = sycl::device; - - sycl::device device_; - sycl::context context_; - sycl::usm::alloc kind_; - std::size_t alignment_; - sycl::queue offload_queue_; - - sycl_memory(sycl::queue queue, - sycl::usm::alloc kind = sycl::usm::alloc::shared, - std::size_t alignment = 1) - : device_(queue.get_device()), context_(queue.get_context()), kind_(kind), - alignment_(alignment), offload_queue_(queue) {} - - T *allocate(std::size_t n) { - auto p = sycl::aligned_alloc(alignment_, n, device_, context_, kind_); - assert(p != nullptr); - return p; - } - - template F *allocate(std::size_t n) { - auto p = sycl::aligned_alloc(alignment_, n, device_, context_, kind_); - assert(p != nullptr); - return p; - } - - void deallocate(T *p, std::size_t n) { - assert(p != nullptr); - sycl::free(p, context_); - p = nullptr; - } - - template void deallocate(F *p, std::size_t n) { - assert(p != nullptr); - sycl::free(p, context_); - p = nullptr; - } - - void memcpy(void *dst, const void *src, std::size_t numBytes) { - assert(dst != nullptr); - assert(src != nullptr); - offload_queue_.memcpy(dst, src, numBytes).wait(); - } - - template void offload(F lambda) { - if (kind_ == sycl::usm::alloc::device) { - offload_queue_.single_task(lambda).wait(); - } else { - lambda(); - } - } -}; -#endif - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/normal_distributed_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/normal_distributed_iterator.hpp deleted file mode 100644 index 83dc1440ada..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/normal_distributed_iterator.hpp +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause -#pragma once - -#include - -namespace dr { - -template -/* -requires(dr::remote_range> && - rng::random_access_range>) - */ -class normal_distributed_iterator_accessor { -public: - using value_type = rng::range_value_t>; - - using segment_type = rng::range_value_t; - - using size_type = rng::range_size_t; - using difference_type = rng::range_difference_t; - - using reference = rng::range_reference_t; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = normal_distributed_iterator_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - constexpr normal_distributed_iterator_accessor() noexcept = default; - constexpr ~normal_distributed_iterator_accessor() noexcept = default; - constexpr normal_distributed_iterator_accessor( - const normal_distributed_iterator_accessor &) noexcept = default; - constexpr normal_distributed_iterator_accessor & - operator=(const normal_distributed_iterator_accessor &) noexcept = default; - - constexpr normal_distributed_iterator_accessor(V segments, - size_type segment_id, - size_type idx) noexcept - : segments_(segments), segment_id_(segment_id), idx_(idx) {} - - constexpr normal_distributed_iterator_accessor & - operator+=(difference_type offset) noexcept { - - while (offset > 0) { - difference_type current_offset = - std::min(offset, difference_type(segments_[segment_id_].size()) - - difference_type(idx_)); - idx_ += current_offset; - offset -= current_offset; - - if (idx_ >= segments_[segment_id_].size()) { - segment_id_++; - idx_ = 0; - } - } - - while (offset < 0) { - difference_type current_offset = - std::min(-offset, difference_type(idx_) + 1); - - difference_type new_idx = difference_type(idx_) - current_offset; - offset += current_offset; - - if (new_idx < 0) { - segment_id_--; - new_idx = segments_[segment_id_].size() - 1; - } - - idx_ = new_idx; - } - - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return segment_id_ == other.segment_id_ && idx_ == other.idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(get_global_idx()) - other.get_global_idx(); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - if (segment_id_ < other.segment_id_) { - return true; - } else if (segment_id_ == other.segment_id_) { - return idx_ < other.idx_; - } else { - return false; - } - } - - constexpr reference operator*() const noexcept { - return segments_[segment_id_][idx_]; - } - - auto segments() const noexcept { - return dr::__detail::drop_segments(segments_, segment_id_, idx_); - } - -private: - size_type get_global_idx() const noexcept { - size_type cumulative_size = 0; - for (std::size_t i = 0; i < segment_id_; i++) { - cumulative_size += segments_[i].size(); - } - return cumulative_size + idx_; - } - - rng::views::all_t segments_; - size_type segment_id_ = 0; - size_type idx_ = 0; -}; - -template -using normal_distributed_iterator = - dr::iterator_adaptor>; - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp index 19c00b5dfd6..ec5892ecb7e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp @@ -6,9 +6,9 @@ #include -#include +#include -namespace dr { +namespace experimental::dr { namespace __detail { @@ -108,4 +108,4 @@ template class direct_iterator { } // namespace __detail -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp index 223433aa936..43074a1f966 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp @@ -4,9 +4,9 @@ #pragma once -#include +#include -namespace dr { +namespace experimental::dr { namespace __detail { @@ -66,4 +66,4 @@ class owning_view : public rng::view_interface> { } // namespace __detail -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp index 0cc96bf9685..4e6060c454c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp @@ -8,9 +8,9 @@ #include #include -#include +#include -namespace dr { +namespace experimental::dr { namespace ranges { @@ -80,7 +80,7 @@ namespace { template concept remote_range_shadow_impl_ = - rng::forward_range && requires(R &r) { dr::ranges::rank(r); }; + rng::forward_range && requires(R &r) { experimental::dr::ranges::rank(r); }; template concept segments_range = @@ -240,14 +240,14 @@ namespace __detail { template concept has_local = requires(T &t) { - { dr::ranges::local(t) } -> std::convertible_to; + { experimental::dr::ranges::local(t) } -> std::convertible_to; }; struct local_fn_ { template requires(has_local) auto operator()(T &&t) const { - return dr::ranges::local(t); + return experimental::dr::ranges::local(t); } template decltype(auto) operator()(T &&t) const { return t; } @@ -259,4 +259,4 @@ inline constexpr auto local = local_fn_{}; } // namespace ranges -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges_utils.hpp deleted file mode 100644 index 1d4f9351e0a..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges_utils.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -namespace dr::__detail { - -// -// std::ranges::enumerate handles unbounded ranges and returns a range -// where end() is a different type than begin(). Most of our code -// assumes std::ranges::common_range. bounded_enumerate requires a -// bounded range and returns a common_range. -// -template auto bounded_enumerate(R &&r) { - auto size = rng::distance(r); - using W = std::uint32_t; - return rng::views::zip(rng::views::iota(W(0), W(size)), r); -} - -} // namespace dr::__detail diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp index 964ffe111e2..7b268ef720b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp @@ -6,10 +6,10 @@ #include -#include -#include +#include +#include -namespace dr { +namespace experimental::dr { template class remote_subrange : public rng::subrange { @@ -27,9 +27,9 @@ class remote_subrange : public rng::subrange { constexpr remote_subrange(R &&r, std::size_t rank) : base(rng::begin(r), rng::end(r)), rank_(rank) {} - template + template constexpr remote_subrange(R &&r) - : base(rng::begin(r), rng::end(r)), rank_(dr::ranges::rank(r)) {} + : base(rng::begin(r), rng::end(r)), rank_(experimental::dr::ranges::rank(r)) {} constexpr std::size_t rank() const noexcept { return rank_; } @@ -40,15 +40,15 @@ class remote_subrange : public rng::subrange { template remote_subrange(R &&, std::size_t) -> remote_subrange>; -template +template remote_subrange(R &&) -> remote_subrange>; -} // namespace dr +} // namespace experimental::dr #if !defined(DR_SPEC) // Needed to satisfy concepts for rng::begin template -inline constexpr bool rng::enable_borrowed_range> = true; +inline constexpr bool rng::enable_borrowed_range> = true; #endif diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp index 05bfb989786..73c3bfdb2f4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp @@ -4,13 +4,13 @@ #pragma once -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include -namespace dr { +namespace experimental::dr { namespace __detail { @@ -26,9 +26,9 @@ auto take_segments(R &&segments, std::size_t last_seg, std::size_t local_id) { auto first = rng::begin(segment); auto last = rng::begin(segment); rng::advance(last, remainder); - return dr::remote_subrange(first, last, dr::ranges::rank(segment)); + return experimental::dr::remote_subrange(first, last, experimental::dr::ranges::rank(segment)); } else { - return dr::remote_subrange(segment); + return experimental::dr::remote_subrange(segment); } }; @@ -63,9 +63,9 @@ auto drop_segments(R &&segments, std::size_t first_seg, std::size_t local_id) { auto first = rng::begin(segment); rng::advance(first, remainder); auto last = rng::end(segment); - return dr::remote_subrange(first, last, dr::ranges::rank(segment)); + return experimental::dr::remote_subrange(first, last, experimental::dr::ranges::rank(segment)); } else { - return dr::remote_subrange(segment); + return experimental::dr::remote_subrange(segment); } }; @@ -91,49 +91,49 @@ template auto drop_segments(R &&segments, std::size_t n) { } // namespace __detail -} // namespace dr +} // namespace experimental::dr namespace DR_RANGES_NAMESPACE { // A standard library range adaptor does not change the rank of a // remote range, so we can simply return the rank of the base view. template - requires(dr::remote_range().base())>) + requires(experimental::dr::remote_range().base())>) auto rank_(V &&v) { - return dr::ranges::rank(std::forward(v).base()); + return experimental::dr::ranges::rank(std::forward(v).base()); } template - requires(dr::is_ref_view_v> && - dr::distributed_range().base())>) + requires(experimental::dr::is_ref_view_v> && + experimental::dr::distributed_range().base())>) auto segments_(V &&v) { - return dr::ranges::segments(v.base()); + return experimental::dr::ranges::segments(v.base()); } template - requires(dr::is_take_view_v> && - dr::distributed_range().base())>) + requires(experimental::dr::is_take_view_v> && + experimental::dr::distributed_range().base())>) auto segments_(V &&v) { - return dr::__detail::take_segments(dr::ranges::segments(v.base()), v.size()); + return experimental::dr::__detail::take_segments(experimental::dr::ranges::segments(v.base()), v.size()); } template - requires(dr::is_drop_view_v> && - dr::distributed_range().base())>) + requires(experimental::dr::is_drop_view_v> && + experimental::dr::distributed_range().base())>) auto segments_(V &&v) { - return dr::__detail::drop_segments(dr::ranges::segments(v.base()), + return experimental::dr::__detail::drop_segments(experimental::dr::ranges::segments(v.base()), v.base().size() - v.size()); } template - requires(dr::is_subrange_view_v> && - dr::distributed_iterator>) + requires(experimental::dr::is_subrange_view_v> && + experimental::dr::distributed_iterator>) auto segments_(V &&v) { auto first = rng::begin(v); auto last = rng::end(v); auto size = rng::distance(first, last); - return dr::__detail::take_segments(dr::ranges::segments(first), size); + return experimental::dr::__detail::take_segments(experimental::dr::ranges::segments(first), size); } } // namespace DR_RANGES_NAMESPACE diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp index c7cdcfbd9c0..177c785b09e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp @@ -6,13 +6,13 @@ #include -#include +#include #ifdef SYCL_LANGUAGE_VERSION #include -namespace dr::__detail { +namespace experimental::dr::__detail { // With the ND-range workaround, the maximum kernel size is // `std::numeric_limits::max()` rounded down to @@ -191,17 +191,17 @@ sycl::event parallel_for(sycl::queue &q, sycl::range<3> global, Fn &&fn) { using event = sycl::event; -} // namespace dr::__detail +} // namespace experimental::dr::__detail #else -namespace dr::__detail { +namespace experimental::dr::__detail { class event { public: void wait() {} }; -} // namespace dr::__detail +} // namespace experimental::dr::__detail #endif // SYCL_LANGUAGE_VERSION diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/tuple_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/tuple_utils.hpp deleted file mode 100644 index 194873fab05..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/tuple_utils.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -namespace dr::__detail { - -auto tuple_transform(auto tuple, auto op) { - auto transform = [op](auto &&...items) { - return std::make_tuple(op(items)...); - }; - return std::apply(transform, tuple); -} - -auto tie_transform(auto tuple, auto op) { - auto transform = [op](Items &&...items) { - return std::tie(op(std::forward(items))...); - }; - return std::apply(transform, tuple); -} - -auto tuple_foreach(auto tuple, auto op) { - auto transform = [op](auto... items) { (op(items), ...); }; - std::apply(transform, tuple); -} - -} // namespace dr::__detail diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp index b972153c11d..357bdfd8665 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp @@ -4,7 +4,7 @@ #pragma once -namespace dr::__detail { +namespace experimental::dr::__detail { inline std::size_t round_up(std::size_t n, std::size_t multiple) { if (multiple == 0) { @@ -27,4 +27,4 @@ inline std::size_t partition_up(std::size_t n, std::size_t multiple) { return round_up(n, multiple) / multiple; } -} // namespace dr::__detail +} // namespace experimental::dr::__detail diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp index edd50c285cc..4b39ab5adda 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp @@ -6,7 +6,7 @@ #include -namespace dr { +namespace experimental::dr { template struct is_ref_view : std::false_type {}; template @@ -56,4 +56,4 @@ struct is_zip_view> : std::true_type {}; template inline constexpr bool is_zip_view_v = is_zip_view::value; -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp index 4874f553c86..38795eae3d6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp @@ -4,10 +4,8 @@ #pragma once -#include "detail/logger.hpp" +// #include "detail/logger.hpp" #include "shp/algorithms/algorithms.hpp" -#include "shp/containers/distributed_dense_matrix.hpp" -#include "shp/containers/sparse_matrix.hpp" #include "shp/detail.hpp" #include "shp/distributed_span.hpp" #include "shp/distributed_vector.hpp" @@ -15,6 +13,5 @@ #include "shp/range.hpp" #include "shp/range_adaptors.hpp" #include "shp/util.hpp" -#include "shp/util/matrix_io.hpp" #include "shp/views/views.hpp" #include "views/views.hpp" diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/algorithms.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/algorithms.hpp index a79d46451b3..369896d2d27 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/algorithms.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/algorithms.hpp @@ -11,7 +11,6 @@ #include "for_each.hpp" #include "inclusive_scan.hpp" #include "iota.hpp" -#include "matrix/matrix_algorithms.hpp" #include "reduce.hpp" #include "sort.hpp" #include "transform.hpp" diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp index b4750fa2f5a..c909c60c54c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp @@ -9,20 +9,20 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { // Copy between contiguous ranges template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { - // auto &&q = dr::shp::__detail::default_queue(); + // auto &&q = experimental::dr::shp::__detail::default_queue(); auto &&q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(std::to_address(d_first), std::to_address(first), sizeof(std::iter_value_t) * (last - first)); @@ -41,7 +41,7 @@ OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { template requires __detail::is_syclmemcopyable, T> sycl::event copy_async(Iter first, Iter last, device_ptr d_first) { - // auto &&q = dr::shp::__detail::default_queue(); + // auto &&q = experimental::dr::shp::__detail::default_queue(); auto &&q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(d_first.get_raw_pointer(), std::to_address(first), sizeof(T) * (last - first)); @@ -58,7 +58,7 @@ device_ptr copy(Iter first, Iter last, device_ptr d_first) { template requires __detail::is_syclmemcopyable> sycl::event copy_async(device_ptr first, device_ptr last, Iter d_first) { - // auto &&q = dr::shp::__detail::default_queue(); + // auto &&q = experimental::dr::shp::__detail::default_queue(); auto &&q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(std::to_address(d_first), first.get_raw_pointer(), sizeof(T) * (last - first)); @@ -77,7 +77,7 @@ template sycl::event copy_async(device_ptr> first, device_ptr> last, device_ptr d_first) { - // auto &&q = dr::shp::__detail::default_queue(); + // auto &&q = experimental::dr::shp::__detail::default_queue(); auto &&q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(d_first.get_raw_pointer(), first.get_raw_pointer(), sizeof(T) * (last - first)); @@ -102,11 +102,11 @@ device_ptr copy(device_ptr> first, } // Copy from local range to distributed range -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { - auto &&segments = dr::ranges::segments(d_first); + auto &&segments = experimental::dr::ranges::segments(d_first); auto segment_iter = rng::begin(segments); std::vector events; @@ -121,24 +121,24 @@ sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { rng::advance(local_last, n_to_copy); events.emplace_back( - dr::shp::copy_async(first, local_last, rng::begin(segment))); + experimental::dr::shp::copy_async(first, local_last, rng::begin(segment))); ++segment_iter; rng::advance(first, n_to_copy); } - return dr::shp::__detail::combine_events(events); + return experimental::dr::shp::__detail::combine_events(events); } -auto copy(rng::contiguous_range auto r, dr::distributed_iterator auto d_first) { +auto copy(rng::contiguous_range auto r, experimental::dr::distributed_iterator auto d_first) { return copy(rng::begin(r), rng::end(r), d_first); } -auto copy(dr::distributed_range auto r, std::contiguous_iterator auto d_first) { +auto copy(experimental::dr::distributed_range auto r, std::contiguous_iterator auto d_first) { return copy(rng::begin(r), rng::end(r), d_first); } -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { @@ -147,13 +147,13 @@ OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { } // Copy from distributed range to local range -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { auto dist = rng::distance(first, last); auto segments = - dr::__detail::take_segments(dr::ranges::segments(first), dist); + experimental::dr::__detail::take_segments(experimental::dr::ranges::segments(first), dist); std::vector events; @@ -161,15 +161,15 @@ sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { auto size = rng::distance(segment); events.emplace_back( - dr::shp::copy_async(rng::begin(segment), rng::end(segment), d_first)); + experimental::dr::shp::copy_async(rng::begin(segment), rng::end(segment), d_first)); rng::advance(d_first, size); } - return dr::shp::__detail::combine_events(events); + return experimental::dr::shp::__detail::combine_events(events); } -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { @@ -178,13 +178,13 @@ OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { } // Copy from distributed range to distributed range -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { auto dist = rng::distance(first, last); auto segments = - dr::__detail::take_segments(dr::ranges::segments(first), dist); + experimental::dr::__detail::take_segments(experimental::dr::ranges::segments(first), dist); std::vector events; @@ -192,15 +192,15 @@ sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { auto size = rng::distance(segment); events.emplace_back( - dr::shp::copy_async(rng::begin(segment), rng::end(segment), d_first)); + experimental::dr::shp::copy_async(rng::begin(segment), rng::end(segment), d_first)); rng::advance(d_first, size); } - return dr::shp::__detail::combine_events(events); + return experimental::dr::shp::__detail::combine_events(events); } -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { @@ -211,18 +211,18 @@ OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { // Ranges versions // Distributed to distributed -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(R &&r, O result) { return copy_async(rng::begin(r), rng::end(r), result); } -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> O copy(R &&r, O result) { return copy(rng::begin(r), rng::end(r), result); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp index 5e724499d00..34e59dae3ec 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp @@ -11,19 +11,19 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace dr::shp { - -template +#include +#include +#include +#include +#include +#include +#include +#include + +namespace experimental::dr::shp { + +template void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, BinaryOp &&binary_op) { using T = rng::range_value_t; @@ -31,7 +31,7 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, static_assert( std::is_same_v, device_policy>); - auto zipped_view = dr::shp::views::zip(r, o); + auto zipped_view = experimental::dr::shp::views::zip(r, o); auto zipped_segments = zipped_view.zipped_segments(); if constexpr (std::is_same_v, @@ -49,7 +49,7 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, auto last_element = rng::prev(rng::end(__detail::local(in_segment))); auto dest = d_inits + segment_id; - auto &&q = __detail::queue(dr::ranges::rank(in_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(in_segment)); auto e = q.single_task([=] { *dest = *last_element; }); events.push_back(e); @@ -67,17 +67,17 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, inits[0] = init; - auto root = dr::shp::devices()[0]; - dr::shp::device_allocator allocator(dr::shp::context(), root); - dr::shp::vector> partial_sums( + auto root = experimental::dr::shp::devices()[0]; + experimental::dr::shp::device_allocator allocator(experimental::dr::shp::context(), root); + experimental::dr::shp::vector> partial_sums( std::size_t(zipped_segments.size()), allocator); segment_id = 0; for (auto &&segs : zipped_segments) { auto &&[in_segment, out_segment] = segs; - auto &&q = __detail::queue(dr::ranges::rank(in_segment)); - auto &&local_policy = __detail::dpl_policy(dr::ranges::rank(in_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(in_segment)); + auto &&local_policy = __detail::dpl_policy(experimental::dr::ranges::rank(in_segment)); auto dist = rng::distance(in_segment); assert(dist > 0); @@ -89,13 +89,13 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, auto init = inits[segment_id]; auto event = oneapi::dpl::experimental::exclusive_scan_async( - local_policy, dr::__detail::direct_iterator(first), - dr::__detail::direct_iterator(last), - dr::__detail::direct_iterator(d_first), init, binary_op); + local_policy, experimental::dr::__detail::direct_iterator(first), + experimental::dr::__detail::direct_iterator(last), + experimental::dr::__detail::direct_iterator(d_first), init, binary_op); - auto dst_iter = dr::ranges::local(partial_sums).data() + segment_id; + auto dst_iter = experimental::dr::ranges::local(partial_sums).data() + segment_id; - auto src_iter = dr::ranges::local(out_segment).data(); + auto src_iter = experimental::dr::ranges::local(out_segment).data(); rng::advance(src_iter, dist - 1); auto e = q.submit([&](auto &&h) { @@ -116,7 +116,7 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, auto &&local_policy = __detail::dpl_policy(0); - auto first = dr::ranges::local(partial_sums).data(); + auto first = experimental::dr::ranges::local(partial_sums).data(); auto last = first + partial_sums.size(); oneapi::dpl::experimental::inclusive_scan_async(local_policy, first, last, @@ -128,15 +128,15 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, auto &&[in_segment, out_segment] = segs; if (idx > 0) { - auto &&q = __detail::queue(dr::ranges::rank(out_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(out_segment)); auto first = rng::begin(out_segment); - dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_first(first); auto d_sum = - dr::ranges::__detail::local(partial_sums).begin() + idx - 1; + experimental::dr::ranges::__detail::local(partial_sums).begin() + idx - 1; - sycl::event e = dr::__detail::parallel_for( + sycl::event e = experimental::dr::__detail::parallel_for( q, sycl::range<>(rng::distance(out_segment)), [=](auto idx) { d_first[idx] = binary_op(d_first[idx], *d_sum); }); @@ -154,8 +154,8 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, // Ranges versions -template +template void exclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, T init, BinaryOp &&binary_op) { exclusive_scan_impl_(std::forward(policy), @@ -163,33 +163,33 @@ void exclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, T init, std::forward(binary_op)); } -template +template void exclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, T init) { exclusive_scan_impl_(std::forward(policy), std::forward(r), std::forward(o), init, std::plus<>{}); } -template +template void exclusive_scan(R &&r, O &&o, T init, BinaryOp &&binary_op) { - exclusive_scan_impl_(dr::shp::par_unseq, std::forward(r), + exclusive_scan_impl_(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o), init, std::forward(binary_op)); } -template +template void exclusive_scan(R &&r, O &&o, T init) { - exclusive_scan_impl_(dr::shp::par_unseq, std::forward(r), + exclusive_scan_impl_(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o), init, std::plus<>{}); } // Iterator versions -template +template void exclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, T init, BinaryOp &&binary_op) { auto dist = rng::distance(first, last); @@ -200,26 +200,26 @@ void exclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, rng::subrange(d_first, d_last), init, std::forward(binary_op)); } -template +template void exclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, T init) { exclusive_scan(std::forward(policy), first, last, d_first, init, std::plus<>{}); } -template void exclusive_scan(Iter first, Iter last, OutputIter d_first, T init, BinaryOp &&binary_op) { - exclusive_scan(dr::shp::par_unseq, first, last, d_first, init, + exclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first, init, std::forward(binary_op)); } -template void exclusive_scan(Iter first, Iter last, OutputIter d_first, T init) { - exclusive_scan(dr::shp::par_unseq, first, last, d_first, init); + exclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first, init); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp index 8bdfa00f651..33de94953f7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { struct device_policy { device_policy(sycl::device device) : devices_({device}) {} @@ -31,4 +31,4 @@ struct device_policy { std::vector devices_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp index a9527c1f1e8..fe287091d38 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp @@ -9,13 +9,13 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template requires(!std::is_const_v> && @@ -25,7 +25,7 @@ sycl::event fill_async(Iter first, Iter last, auto &&q = __detail::get_queue_for_pointer(first); std::iter_value_t *arr = std::to_address(first); // not using q.fill because of CMPLRLLVM-46438 - return dr::__detail::parallel_for(q, sycl::range<>(last - first), + return experimental::dr::__detail::parallel_for(q, sycl::range<>(last - first), [=](auto idx) { arr[idx] = value; }); } @@ -42,7 +42,7 @@ sycl::event fill_async(device_ptr first, device_ptr last, auto &&q = __detail::get_queue_for_pointer(first); auto *arr = first.get_raw_pointer(); // not using q.fill because of CMPLRLLVM-46438 - return dr::__detail::parallel_for(q, sycl::range<>(last - first), + return experimental::dr::__detail::parallel_for(q, sycl::range<>(last - first), [=](auto idx) { arr[idx] = value; }); } @@ -52,43 +52,43 @@ void fill(device_ptr first, device_ptr last, const U &value) { fill_async(first, last, value).wait(); } -template +template sycl::event fill_async(R &&r, const T &value) { - auto &&q = __detail::queue(dr::ranges::rank(r)); - auto *arr = std::to_address(rng::begin(dr::ranges::local(r))); + auto &&q = __detail::queue(experimental::dr::ranges::rank(r)); + auto *arr = std::to_address(rng::begin(experimental::dr::ranges::local(r))); // not using q.fill because of CMPLRLLVM-46438 - return dr::__detail::parallel_for(q, sycl::range<>(rng::distance(r)), + return experimental::dr::__detail::parallel_for(q, sycl::range<>(rng::distance(r)), [=](auto idx) { arr[idx] = value; }); } -template +template auto fill(R &&r, const T &value) { fill_async(r, value).wait(); return rng::end(r); } -template +template sycl::event fill_async(DR &&r, const T &value) { std::vector events; - for (auto &&segment : dr::ranges::segments(r)) { - auto e = dr::shp::fill_async(segment, value); + for (auto &&segment : experimental::dr::ranges::segments(r)) { + auto e = experimental::dr::shp::fill_async(segment, value); events.push_back(e); } - return dr::shp::__detail::combine_events(events); + return experimental::dr::shp::__detail::combine_events(events); } -template +template auto fill(DR &&r, const T &value) { fill_async(r, value).wait(); return rng::end(r); } -template +template auto fill(Iter first, Iter last, const T &value) { fill_async(rng::subrange(first, last), value).wait(); return last; } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp index 77802a521ff..1135740f933 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp @@ -6,24 +6,24 @@ #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { -template +template void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { static_assert( // currently only one policy supported std::is_same_v, device_policy>); std::vector events; - for (auto &&segment : dr::ranges::segments(r)) { - auto &&q = __detail::queue(dr::ranges::rank(segment)); + for (auto &&segment : experimental::dr::ranges::segments(r)) { + auto &&q = __detail::queue(experimental::dr::ranges::rank(segment)); assert(rng::distance(segment) > 0); @@ -31,7 +31,7 @@ void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { auto first = rng::begin(local_segment); - auto event = dr::__detail::parallel_for( + auto event = experimental::dr::__detail::parallel_for( q, sycl::range<>(rng::distance(local_segment)), [=](auto idx) { fn(*(first + idx)); }); events.emplace_back(event); @@ -39,19 +39,19 @@ void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { __detail::wait(events); } -template +template void for_each(ExecutionPolicy &&policy, Iter begin, Iter end, Fn &&fn) { for_each(std::forward(policy), rng::subrange(begin, end), std::forward(fn)); } -template void for_each(R &&r, Fn &&fn) { - for_each(dr::shp::par_unseq, std::forward(r), std::forward(fn)); +template void for_each(R &&r, Fn &&fn) { + for_each(experimental::dr::shp::par_unseq, std::forward(r), std::forward(fn)); } -template +template void for_each(Iter begin, Iter end, Fn &&fn) { - for_each(dr::shp::par_unseq, begin, end, std::forward(fn)); + for_each(experimental::dr::shp::par_unseq, begin, end, std::forward(fn)); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp index a6cdc828cb1..17c1e4c7c02 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp @@ -13,19 +13,19 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace dr::shp { - -template +#include +#include +#include +#include +#include +#include +#include + +namespace experimental::dr::shp { + +template > void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, BinaryOp &&binary_op, std::optional init = {}) { @@ -34,7 +34,7 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, static_assert( std::is_same_v, device_policy>); - auto zipped_view = dr::shp::views::zip(r, o); + auto zipped_view = experimental::dr::shp::views::zip(r, o); auto zipped_segments = zipped_view.zipped_segments(); if constexpr (std::is_same_v, @@ -42,17 +42,17 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, std::vector events; - auto root = dr::shp::devices()[0]; - dr::shp::device_allocator allocator(dr::shp::context(), root); - dr::shp::vector> partial_sums( + auto root = experimental::dr::shp::devices()[0]; + experimental::dr::shp::device_allocator allocator(experimental::dr::shp::context(), root); + experimental::dr::shp::vector> partial_sums( std::size_t(zipped_segments.size()), allocator); std::size_t segment_id = 0; for (auto &&segs : zipped_segments) { auto &&[in_segment, out_segment] = segs; - auto &&q = __detail::queue(dr::ranges::rank(in_segment)); - auto &&local_policy = __detail::dpl_policy(dr::ranges::rank(in_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(in_segment)); + auto &&local_policy = __detail::dpl_policy(experimental::dr::ranges::rank(in_segment)); auto dist = rng::distance(in_segment); assert(dist > 0); @@ -65,19 +65,19 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, if (segment_id == 0 && init.has_value()) { event = oneapi::dpl::experimental::inclusive_scan_async( - local_policy, dr::__detail::direct_iterator(first), - dr::__detail::direct_iterator(last), - dr::__detail::direct_iterator(d_first), binary_op, init.value()); + local_policy, experimental::dr::__detail::direct_iterator(first), + experimental::dr::__detail::direct_iterator(last), + experimental::dr::__detail::direct_iterator(d_first), binary_op, init.value()); } else { event = oneapi::dpl::experimental::inclusive_scan_async( - local_policy, dr::__detail::direct_iterator(first), - dr::__detail::direct_iterator(last), - dr::__detail::direct_iterator(d_first), binary_op); + local_policy, experimental::dr::__detail::direct_iterator(first), + experimental::dr::__detail::direct_iterator(last), + experimental::dr::__detail::direct_iterator(d_first), binary_op); } - auto dst_iter = dr::ranges::local(partial_sums).data() + segment_id; + auto dst_iter = experimental::dr::ranges::local(partial_sums).data() + segment_id; - auto src_iter = dr::ranges::local(out_segment).data(); + auto src_iter = experimental::dr::ranges::local(out_segment).data(); rng::advance(src_iter, dist - 1); auto e = q.submit([&](auto &&h) { @@ -98,7 +98,7 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, auto &&local_policy = __detail::dpl_policy(0); - auto first = dr::ranges::local(partial_sums).data(); + auto first = experimental::dr::ranges::local(partial_sums).data(); auto last = first + partial_sums.size(); oneapi::dpl::experimental::inclusive_scan_async(local_policy, first, last, @@ -110,15 +110,15 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, auto &&[in_segment, out_segment] = segs; if (idx > 0) { - auto &&q = __detail::queue(dr::ranges::rank(out_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(out_segment)); auto first = rng::begin(out_segment); - dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_first(first); auto d_sum = - dr::ranges::__detail::local(partial_sums).begin() + idx - 1; + experimental::dr::ranges::__detail::local(partial_sums).begin() + idx - 1; - sycl::event e = dr::__detail::parallel_for( + sycl::event e = experimental::dr::__detail::parallel_for( q, sycl::range<>(rng::distance(out_segment)), [=](auto idx) { d_first[idx] = binary_op(d_first[idx], *d_sum); }); @@ -134,8 +134,8 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, } } -template +template void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, BinaryOp &&binary_op, T init) { inclusive_scan_impl_(std::forward(policy), @@ -143,8 +143,8 @@ void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, std::forward(binary_op), std::optional(init)); } -template +template void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, BinaryOp &&binary_op) { inclusive_scan_impl_(std::forward(policy), @@ -152,8 +152,8 @@ void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, std::forward(binary_op)); } -template +template void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o) { inclusive_scan(std::forward(policy), std::forward(r), std::forward(o), std::plus>()); @@ -161,8 +161,8 @@ void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o) { // Distributed iterator versions -template +template OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op, T init) { @@ -176,8 +176,8 @@ OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, return d_last; } -template +template OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op) { @@ -191,8 +191,8 @@ OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, return d_last; } -template +template OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first) { auto dist = rng::distance(first, last); @@ -206,47 +206,47 @@ OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, // Execution policy-less versions -template +template void inclusive_scan(R &&r, O &&o) { - inclusive_scan(dr::shp::par_unseq, std::forward(r), std::forward(o)); + inclusive_scan(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o)); } -template +template void inclusive_scan(R &&r, O &&o, BinaryOp &&binary_op) { - inclusive_scan(dr::shp::par_unseq, std::forward(r), std::forward(o), + inclusive_scan(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o), std::forward(binary_op)); } -template +template void inclusive_scan(R &&r, O &&o, BinaryOp &&binary_op, T init) { - inclusive_scan(dr::shp::par_unseq, std::forward(r), std::forward(o), + inclusive_scan(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o), std::forward(binary_op), init); } // Distributed iterator versions -template +template OutputIter inclusive_scan(Iter first, Iter last, OutputIter d_first) { - return inclusive_scan(dr::shp::par_unseq, first, last, d_first); + return inclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first); } -template OutputIter inclusive_scan(Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op) { - return inclusive_scan(dr::shp::par_unseq, first, last, d_first, + return inclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first, std::forward(binary_op)); } -template OutputIter inclusive_scan(Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op, T init) { - return inclusive_scan(dr::shp::par_unseq, first, last, d_first, + return inclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first, std::forward(binary_op), init); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp index 23641501583..01ebc163ce6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp @@ -6,14 +6,14 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { -template void iota(R &&r, T value) { +template void iota(R &&r, T value) { auto iota_view = rng::views::iota(value, T(value + rng::distance(r))); for_each(par_unseq, views::zip(iota_view, r), [](auto &&elem) { @@ -22,10 +22,10 @@ template void iota(R &&r, T value) { }); } -template +template void iota(Iter begin, Iter end, T value) { auto r = rng::subrange(begin, end); iota(r, value); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp deleted file mode 100644 index 9ff6f650284..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp +++ /dev/null @@ -1,245 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include - -namespace dr::shp { - -template -void gemm(distributed_dense_matrix &a, distributed_dense_matrix &b, - distributed_dense_matrix &c) { - gemm_buffered(a, b, c); -} - -template -void gemm_inplace(distributed_dense_matrix &a, - distributed_dense_matrix &b, - distributed_dense_matrix &c) { - // Matrix dimensions must match (algorithm requirement) - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - // Tile grid dimensions must match (implementation limitation) - - assert(c.grid_shape()[0] == a.grid_shape()[0]); - assert(c.grid_shape()[1] == b.grid_shape()[1]); - assert(a.grid_shape()[1] == b.grid_shape()[0]); - - std::vector events; - events.reserve(c.grid_shape()[0] * c.grid_shape()[1] * a.grid_shape()[1]); - - for (std::size_t i = 0; i < c.grid_shape()[0]; i++) { - for (std::size_t j = 0; j < c.grid_shape()[1]; j++) { - // For each tile of the output C matrix - auto &&c_tile = c.tile({i, j}); - - std::vector local_events; - local_events.reserve(a.grid_shape()[1]); - - std::size_t k_offset = i + j; - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - - auto &&a_tile = a.tile({i, k}); - auto &&b_tile = b.tile({k, j}); - - auto &&q = __detail::queue(dr::ranges::rank(c_tile)); - - auto e = __detail::local_gemm(q, __detail::local(a_tile), - __detail::local(b_tile), - __detail::local(c_tile), local_events); - - local_events.push_back(e); - } - - for (auto &&e : local_events) { - events.push_back(e); - } - } - } - - __detail::wait(events); -} - -template -void gemm_buffered(distributed_dense_matrix &a, - distributed_dense_matrix &b, - distributed_dense_matrix &c) { - // Matrix dimensions must match (algorithm requirement) - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - // Tile grid dimensions must match (implementation limitation) - - assert(c.grid_shape()[0] == a.grid_shape()[0]); - assert(c.grid_shape()[1] == b.grid_shape()[1]); - assert(a.grid_shape()[1] == b.grid_shape()[0]); - - std::vector threads; - - std::atomic communication = 0; - std::atomic compute = 0; - - for (std::size_t i = 0; i < c.grid_shape()[0]; i++) { - for (std::size_t j = 0; j < c.grid_shape()[1]; j++) { - auto c_local = c.tile({i, j}); - - threads.emplace_back([c_local, i, j, &a, &b, &communication, &compute] { - auto &&q = __detail::queue(dr::ranges::rank(c_local)); - - std::size_t a_elem = a.tile_shape()[0] * a.tile_shape()[1]; - std::size_t b_elem = b.tile_shape()[0] * b.tile_shape()[1]; - std::size_t buffer_size = std::max(a_elem, b_elem); - - dr::shp::device_allocator gpu_allocator(q); - dr::shp::buffered_allocator buffered_allocator(gpu_allocator, - buffer_size, 2); - auto &&allocator = buffered_allocator; - - std::size_t k_offset = i + j; - - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - - auto begin = std::chrono::high_resolution_clock::now(); - auto a_tile = a.get_tile({i, k}, allocator); - auto b_tile = b.get_tile({k, j}, allocator); - auto end = std::chrono::high_resolution_clock::now(); - double duration = std::chrono::duration(end - begin).count(); - communication += duration; - - dr::shp::dense_matrix_view a_local(a_tile); - dr::shp::dense_matrix_view b_local(b_tile); - - begin = std::chrono::high_resolution_clock::now(); - __detail::local_gemm(q, __detail::local(a_local), - __detail::local(b_local), - __detail::local(c_local)) - .wait(); - end = std::chrono::high_resolution_clock::now(); - duration = std::chrono::duration(end - begin).count(); - compute += duration; - } - }); - } - } - - for (auto &&t : threads) { - t.join(); - } - - bool debug_print = false; - - if (debug_print) { - std::cout << "communication total: " << (double)communication << std::endl; - std::cout << "compute total: " << (double)compute << std::endl; - } -} - -template -void gemm_buffered_async(distributed_dense_matrix &a, - distributed_dense_matrix &b, - distributed_dense_matrix &c) { - // Matrix dimensions must match (algorithm requirement) - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - // Tile grid dimensions must match (implementation limitation) - - assert(c.grid_shape()[0] == a.grid_shape()[0]); - assert(c.grid_shape()[1] == b.grid_shape()[1]); - assert(a.grid_shape()[1] == b.grid_shape()[0]); - - std::vector threads; - - std::atomic issue = 0; - std::atomic sync = 0; - std::atomic compute = 0; - - for (std::size_t i = 0; i < c.grid_shape()[0]; i++) { - for (std::size_t j = 0; j < c.grid_shape()[1]; j++) { - auto c_local = c.tile({i, j}); - - threads.emplace_back([c_local, i, j, &a, &b, &issue, &sync, &compute] { - auto &&q = __detail::queue(dr::ranges::rank(c_local)); - - std::size_t a_elem = a.tile_shape()[0] * a.tile_shape()[1]; - std::size_t b_elem = b.tile_shape()[0] * b.tile_shape()[1]; - std::size_t buffer_size = std::max(a_elem, b_elem); - - dr::shp::device_allocator gpu_allocator(q); - dr::shp::buffered_allocator buffered_allocator(gpu_allocator, - buffer_size, 4); - auto &&allocator = buffered_allocator; - - std::size_t k_offset = i + j; - - auto begin = std::chrono::high_resolution_clock::now(); - auto a_f = - a.get_tile_async({i, k_offset % a.grid_shape()[1]}, allocator); - // a_f.wait(); - auto b_f = - b.get_tile_async({k_offset % a.grid_shape()[1], j}, allocator); - // b_f.wait(); - auto end = std::chrono::high_resolution_clock::now(); - double duration = std::chrono::duration(end - begin).count(); - issue += duration; - - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - - auto begin = std::chrono::high_resolution_clock::now(); - auto a_tile = a_f.get(); - auto b_tile = b_f.get(); - auto end = std::chrono::high_resolution_clock::now(); - double duration = std::chrono::duration(end - begin).count(); - sync += duration; - - dr::shp::dense_matrix_view a_local(a_tile); - dr::shp::dense_matrix_view b_local(b_tile); - - if (k_ + 1 < a.grid_shape()[1]) { - begin = std::chrono::high_resolution_clock::now(); - a_f = a.get_tile_async({i, (k + 1) % a.grid_shape()[1]}, allocator); - // a_f.wait(); - b_f = b.get_tile_async({(k + 1) % a.grid_shape()[1], j}, allocator); - // b_f.wait(); - end = std::chrono::high_resolution_clock::now(); - duration = std::chrono::duration(end - begin).count(); - issue += duration; - } - - begin = std::chrono::high_resolution_clock::now(); - __detail::local_gemm(q, __detail::local(a_local), - __detail::local(b_local), - __detail::local(c_local)) - .wait(); - end = std::chrono::high_resolution_clock::now(); - duration = std::chrono::duration(end - begin).count(); - compute += duration; - } - }); - } - } - - for (auto &&t : threads) { - t.join(); - } - - bool debug_print = false; - - if (debug_print) { - std::cout << "sync total: " << (double)sync << std::endl; - std::cout << "issue total: " << (double)issue << std::endl; - std::cout << "compute total: " << (double)compute << std::endl; - } -} - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp deleted file mode 100644 index 5f1ceb6c9e9..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace dr::shp { - -template -void flat_gemv(C &&c, dr::shp::sparse_matrix &a, B &&b) { - assert(c.size() == b.size()); - assert(a.shape()[1] == b.size()); - assert(a.grid_shape()[0] == c.segments().size()); - assert(a.grid_shape()[1] == 1); - - auto &&devices = dr::shp::devices(); - - using b_scalar_type = rng::range_value_t; - - using local_vector_type = - dr::shp::device_vector>; - - std::vector local_b; - std::vector copy_events; - std::vector comp_events; - - for (std::size_t i = 0; i < devices.size(); i++) { - dr::shp::device_allocator allocator(dr::shp::context(), devices[i]); - local_b.push_back(local_vector_type(b.size(), allocator, i)); - } - - for (auto &&l_b : local_b) { - auto event = - dr::shp::copy_async(b.begin(), b.end(), dr::ranges::local(l_b.begin())); - copy_events.push_back(event); - } - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - auto a_tile = a.tile(dr::index(i, 0)); - - auto a_iter = a_tile.begin(); - auto b_iter = dr::ranges::local(local_b[i].begin()); - auto c_iter = dr::ranges::local(c.segments()[i].begin()); - - auto &&q = __detail::queue(a_tile.rank()); - - auto event = q.submit([&](auto &&h) { - h.depends_on(copy_events[a_tile.rank()]); - h.parallel_for(a_tile.size(), [=](auto idx) { - auto &&[index, a_v] = *(a_iter + idx); - auto &&[i, k] = index; - auto &&b_v = *(b_iter + k); - auto &&c_v = *(c_iter + i); - sycl::atomic_ref - c_ref(c_v); - c_ref += a_v * b_v; - }); - }); - comp_events.push_back(event); - } - - __detail::wait(comp_events); -} - -template -void gemv(C &&c, dr::shp::sparse_matrix &a, B &&b, - shp::duplicated_vector> &scratch) { - assert(c.size() == b.size()); - assert(a.shape()[1] == b.size()); - assert(a.grid_shape()[0] == c.segments().size()); - assert(a.grid_shape()[1] == 1); - - auto &&b_duplicated = scratch; - - std::vector copy_events; - std::vector comp_events; - copy_events.reserve(shp::nprocs()); - comp_events.reserve(a.grid_shape()[0]); - - for (std::size_t i = 0; i < shp::nprocs(); i++) { - auto &&l_b = b_duplicated.local_vector(i); - auto event = dr::shp::copy_async(b.begin(), b.end(), l_b.begin()); - copy_events.push_back(event); - } - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - auto a_tile = a.tile(dr::index(i, 0)); - - auto b_iter = - dr::ranges::local(b_duplicated.local_vector(a_tile.rank()).begin()); - auto c_iter = dr::ranges::local(c.segments()[i].begin()); - - auto &&q = __detail::queue(a_tile.rank()); - - auto event = __detail::local_gemv(q, a_tile, b_iter, c_iter, - {copy_events[a_tile.rank()]}); - comp_events.push_back(event); - } - - __detail::wait(comp_events); -} - -template -void gemv(C &&c, dr::shp::sparse_matrix &a, B &&b) { - dr::shp::duplicated_vector> b_duplicated(b.size()); - - gemv(c, a, b, b_duplicated); -} - -template -void gemv_square(C &&c, dr::shp::sparse_matrix &a, B &&b) { - assert(a.shape()[0] == c.size()); - assert(a.shape()[1] == b.size()); - assert(a.grid_shape()[0] == c.segments().size()); - assert(a.grid_shape()[1] == b.segments().size()); - - std::vector events; - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - std::size_t k_offset = i; - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - auto a_tile = a.tile(dr::index(i, k)); - auto b_segment = b.segments()[k]; - auto c_segment = c.segments()[i]; - - auto b_iter = dr::ranges::local(b_segment.begin()); - auto c_iter = dr::ranges::local(c_segment.begin()); - - auto &&q = __detail::queue(a_tile.rank()); - - auto event = __detail::custom_gemv(q, a_tile, b_iter, c_iter); - events.push_back(event); - } - } - - __detail::wait(events); -} - -template -void gemv_square_copy(C &&c, dr::shp::sparse_matrix &a, B &&b) { - assert(a.shape()[0] == c.size()); - assert(a.shape()[1] == b.size()); - assert(a.grid_shape()[0] == c.segments().size()); - assert(a.grid_shape()[1] == b.segments().size()); - - auto &&devices = dr::shp::devices(); - - using b_scalar_type = rng::range_value_t; - - using local_vector_type = - dr::shp::device_vector>; - - std::vector local_b; - std::vector events; - - local_b.reserve(a.grid_shape()[0]); - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - dr::shp::device_allocator allocator( - dr::shp::context(), devices[a.tile(dr::index(i, 0)).rank()]); - local_b.emplace_back(b.size(), allocator, - a.tile(dr::index(i, 0)).rank()); - } - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - std::size_t k_offset = i; - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - auto a_tile = a.tile({i, k}); - auto b_iter = local_b[i].begin() + (k * a.tile_shape()[1]); - auto c_iter = c.segments()[i].begin(); - - auto &&b_segment = b.segments()[k]; - auto &&q = __detail::queue(a_tile.rank()); - - auto ce = - dr::shp::copy_async(q, b_segment.begin(), b_segment.end(), b_iter); - - auto event = __detail::custom_gemv(q, a_tile, b_iter.local(), - c_iter.local(), {ce}); - - events.push_back(event); - } - } - - __detail::wait(events); -} - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp deleted file mode 100644 index 1d049667a5c..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#ifdef USE_MKL -#include -#endif - -namespace dr::shp { - -namespace __detail { - -template -auto custom_gemm(sycl::queue &q, shp::dense_matrix_view a, - shp::dense_matrix_view b, shp::dense_matrix_view c, - const std::vector &dependencies = {}) { - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - std::size_t M = c.shape()[0]; - std::size_t N = c.shape()[1]; - std::size_t K = a.shape()[1]; - - auto a_p = a.data(); - auto b_p = b.data(); - auto c_p = c.data(); - - auto e = q.parallel_for(sycl::range<3>{M, K, N}, [=](auto idx) { - auto i = idx[0]; - auto k = idx[1]; - auto j = idx[2]; - - sycl::atomic_ref - c_ref(c_p[i * N + j]); - - c_ref += a_p[i * K + k] * b_p[k * N + j]; - }); - return e; -} - -#ifdef USE_MKL - -template -auto mkl_gemm(sycl::queue &q, shp::dense_matrix_view a, - shp::dense_matrix_view b, shp::dense_matrix_view c, - const std::vector &dependencies = {}) { - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - auto event = oneapi::mkl::blas::row_major::gemm( - q, oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, - c.shape()[0], c.shape()[1], a.shape()[1], T(1), a.data(), a.ld(), - b.data(), b.ld(), T(1), c.data(), c.ld(), dependencies); - - return event; -} - -template -auto local_gemm(sycl::queue &q, shp::dense_matrix_view a, - shp::dense_matrix_view b, shp::dense_matrix_view c, - const std::vector &dependencies = {}) { - return mkl_gemm(q, a, b, c, dependencies); -} - -#else - -template -auto local_gemm(sycl::queue &q, shp::dense_matrix_view a, - shp::dense_matrix_view b, shp::dense_matrix_view c, - const std::vector &dependencies = {}) { - return custom_gemm(q, a, b, c, dependencies); -} - -#endif - -} // namespace __detail - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp deleted file mode 100644 index 925b44f9b6d..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp +++ /dev/null @@ -1,100 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -#ifdef USE_MKL -#include -#endif - -namespace dr::shp { - -namespace __detail { - -template - requires(std::is_same_v, T>) -auto custom_gemv(sycl::queue &q, csr_matrix_view a, Iter b, - Iter c, const std::vector &dependencies = {}) { - std::size_t wg = 32; - - auto event = q.submit([&](auto &&h) { - h.depends_on(dependencies); - h.parallel_for(sycl::nd_range<1>(a.shape()[0] * wg, wg), [=](auto item) { - auto row_index = item.get_group(0); - auto local_id = item.get_local_id(); - auto group_size = item.get_local_range(0); - - auto row = a.row(row_index); - - for (std::size_t idx = local_id; idx < row.size(); idx += group_size) { - auto &&[index, a_v] = row[idx]; - auto &&[i, k] = index; - - auto &&b_v = *(b + k); - auto &&c_v = *(c + i); - - sycl::atomic_ref - c_ref(c_v); - - c_ref += a_v * b_v; - } - }); - }); - return event; -} - -#ifdef USE_MKL - -template - requires(std::is_same_v, T>) -auto mkl_gemv(sycl::queue &q, csr_matrix_view a, Iter b, Iter c, - const std::vector &dependencies = {}) { - - oneapi::mkl::sparse::matrix_handle_t a_handle; - oneapi::mkl::sparse::init_matrix_handle(&a_handle); - - auto rowptr = dr::shp::__detail::local(a.rowptr_data()); - auto colind = dr::shp::__detail::local(a.colind_data()); - auto values = dr::shp::__detail::local(a.values_data()); - - oneapi::mkl::sparse::set_csr_data(q, a_handle, a.shape()[0], a.shape()[1], - oneapi::mkl::index_base::zero, rowptr, - colind, values); - - auto event = - oneapi::mkl::sparse::gemv(q, oneapi::mkl::transpose::nontrans, T(1), - a_handle, b, T(1), c, dependencies); - return event; -} - -template - requires(std::is_same_v, T>) -auto local_gemv(sycl::queue &q, csr_matrix_view a, Iter b, - Iter c, const std::vector &dependencies = {}) { - return mkl_gemv(q, a, b, c, dependencies); -} - -#else - -template - requires(std::is_same_v, T>) -auto local_gemv(sycl::queue &q, csr_matrix_view a, Iter b, - Iter c, const std::vector &dependencies = {}) { - return custom_gemv(q, a, b, c, dependencies); -} - -#endif - -} // namespace __detail - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp deleted file mode 100644 index 040b2568522..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp index 04df2fc677b..96f1f4c91dd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp @@ -9,10 +9,10 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include #include namespace { @@ -28,8 +28,8 @@ auto reduce_no_init_async(ExecutionPolicy &&policy, Iter first, Iter last, std::iter_value_t init = *new_last; - dr::__detail::direct_iterator d_first(first); - dr::__detail::direct_iterator d_last(new_last); + experimental::dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_last(new_last); return oneapi::dpl::experimental::reduce_async( std::forward(policy), d_first, d_last, @@ -41,8 +41,8 @@ template ) auto reduce_no_init_async(ExecutionPolicy &&policy, Iter first, Iter last, Fn &&fn) { - dr::__detail::direct_iterator d_first(first); - dr::__detail::direct_iterator d_last(last); + experimental::dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_last(last); return oneapi::dpl::experimental::reduce_async( std::forward(policy), d_first, d_last, @@ -51,9 +51,9 @@ auto reduce_no_init_async(ExecutionPolicy &&policy, Iter first, Iter last, } // namespace -namespace dr::shp { +namespace experimental::dr::shp { -template T reduce(ExecutionPolicy &&policy, R &&r, T init, BinaryOp &&binary_op) { @@ -63,13 +63,13 @@ T reduce(ExecutionPolicy &&policy, R &&r, T init, BinaryOp &&binary_op) { if constexpr (std::is_same_v, device_policy>) { using future_t = decltype(oneapi::dpl::experimental::reduce_async( - __detail::dpl_policy(0), dr::ranges::segments(r)[0].begin(), - dr::ranges::segments(r)[0].end(), init, binary_op)); + __detail::dpl_policy(0), experimental::dr::ranges::segments(r)[0].begin(), + experimental::dr::ranges::segments(r)[0].end(), init, binary_op)); std::vector futures; - for (auto &&segment : dr::ranges::segments(r)) { - auto &&local_policy = __detail::dpl_policy(dr::ranges::rank(segment)); + for (auto &&segment : experimental::dr::ranges::segments(r)) { + auto &&local_policy = __detail::dpl_policy(experimental::dr::ranges::rank(segment)); auto dist = rng::distance(segment); if (dist <= 0) { @@ -95,13 +95,13 @@ T reduce(ExecutionPolicy &&policy, R &&r, T init, BinaryOp &&binary_op) { } } -template +template T reduce(ExecutionPolicy &&policy, R &&r, T init) { return reduce(std::forward(policy), std::forward(r), init, std::plus<>()); } -template +template rng::range_value_t reduce(ExecutionPolicy &&policy, R &&r) { return reduce(std::forward(policy), std::forward(r), rng::range_value_t{}, std::plus<>()); @@ -109,7 +109,7 @@ rng::range_value_t reduce(ExecutionPolicy &&policy, R &&r) { // Iterator versions -template +template std::iter_value_t reduce(ExecutionPolicy &&policy, Iter first, Iter last) { return reduce(std::forward(policy), @@ -117,13 +117,13 @@ std::iter_value_t reduce(ExecutionPolicy &&policy, Iter first, std::plus<>()); } -template +template T reduce(ExecutionPolicy &&policy, Iter first, Iter last, T init) { return reduce(std::forward(policy), rng::subrange(first, last), init, std::plus<>()); } -template T reduce(ExecutionPolicy &&policy, Iter first, Iter last, T init, BinaryOp &&binary_op) { @@ -134,34 +134,34 @@ T reduce(ExecutionPolicy &&policy, Iter first, Iter last, T init, // Execution policy-less algorithms -template rng::range_value_t reduce(R &&r) { - return reduce(dr::shp::par_unseq, std::forward(r)); +template rng::range_value_t reduce(R &&r) { + return reduce(experimental::dr::shp::par_unseq, std::forward(r)); } -template T reduce(R &&r, T init) { - return reduce(dr::shp::par_unseq, std::forward(r), init); +template T reduce(R &&r, T init) { + return reduce(experimental::dr::shp::par_unseq, std::forward(r), init); } -template +template T reduce(R &&r, T init, BinaryOp &&binary_op) { - return reduce(dr::shp::par_unseq, std::forward(r), init, + return reduce(experimental::dr::shp::par_unseq, std::forward(r), init, std::forward(binary_op)); } -template +template std::iter_value_t reduce(Iter first, Iter last) { - return reduce(dr::shp::par_unseq, first, last); + return reduce(experimental::dr::shp::par_unseq, first, last); } -template +template T reduce(Iter first, Iter last, T init) { - return reduce(dr::shp::par_unseq, first, last, init); + return reduce(experimental::dr::shp::par_unseq, first, last, init); } -template +template T reduce(Iter first, Iter last, T init, BinaryOp &&binary_op) { - return reduce(dr::shp::par_unseq, first, last, init, + return reduce(experimental::dr::shp::par_unseq, first, last, init, std::forward(binary_op)); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp index 7fb0b516e50..0790536ab92 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp @@ -9,12 +9,12 @@ #include #include -#include -#include -#include +#include +#include +#include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -22,8 +22,8 @@ template sycl::event sort_async(LocalPolicy &&policy, InputIt first, InputIt last, Compare &&comp) { if (rng::distance(first, last) >= 2) { - dr::__detail::direct_iterator d_first(first); - dr::__detail::direct_iterator d_last(last); + experimental::dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_last(last); return oneapi::dpl::experimental::sort_async( std::forward(policy), d_first, d_last, std::forward(comp)); @@ -37,13 +37,13 @@ template (policy), d_start, d_end, d_value_first, d_value_last, d_result, @@ -53,17 +53,17 @@ OutputIt lower_bound(LocalPolicy &&policy, InputIt1 start, InputIt1 end, } // namespace __detail -template > +template > void sort(R &&r, Compare comp = Compare()) { - auto &&segments = dr::ranges::segments(r); + auto &&segments = experimental::dr::ranges::segments(r); if (rng::size(segments) == 0) { return; } else if (rng::size(segments) == 1) { auto &&segment = *rng::begin(segments); auto &&local_policy = - dr::shp::__detail::dpl_policy(dr::ranges::rank(segment)); - auto &&local_segment = dr::shp::__detail::local(segment); + experimental::dr::shp::__detail::dpl_policy(experimental::dr::ranges::rank(segment)); + auto &&local_segment = experimental::dr::shp::__detail::local(segment); __detail::sort_async(local_policy, rng::begin(local_segment), rng::end(local_segment), comp) @@ -86,11 +86,11 @@ void sort(R &&r, Compare comp = Compare()) { std::size_t segment_id = 0; for (auto &&segment : segments) { - auto &&q = dr::shp::__detail::queue(dr::ranges::rank(segment)); + auto &&q = experimental::dr::shp::__detail::queue(experimental::dr::ranges::rank(segment)); auto &&local_policy = - dr::shp::__detail::dpl_policy(dr::ranges::rank(segment)); + experimental::dr::shp::__detail::dpl_policy(experimental::dr::ranges::rank(segment)); - auto &&local_segment = dr::shp::__detail::local(segment); + auto &&local_segment = experimental::dr::shp::__detail::local(segment); auto s = __detail::sort_async(local_policy, rng::begin(local_segment), rng::end(local_segment), comp); @@ -112,12 +112,12 @@ void sort(R &&r, Compare comp = Compare()) { ++segment_id; } - dr::shp::__detail::wait(events); + experimental::dr::shp::__detail::wait(events); events.clear(); // Compute global medians by sorting medians and // computing `n_splitters` medians from the medians. - auto &&local_policy = dr::shp::__detail::dpl_policy(0); + auto &&local_policy = experimental::dr::shp::__detail::dpl_policy(0); __detail::sort_async(local_policy, medians, medians + n_segments * n_splitters, comp) .wait(); @@ -127,7 +127,7 @@ void sort(R &&r, Compare comp = Compare()) { // - Collect median of medians to get final splitters. // - Write splitters to [0, n_splitters) in `medians` - auto &&q = dr::shp::__detail::queue(0); + auto &&q = experimental::dr::shp::__detail::queue(0); q.single_task([=] { for (std::size_t i = 0; i < n_splitters; i++) { medians[i] = medians[std::size_t(step_size * (i + 1) + 0.5)]; @@ -144,11 +144,11 @@ void sort(R &&r, Compare comp = Compare()) { segment_id = 0; for (auto &&segment : segments) { - auto &&q = dr::shp::__detail::queue(dr::ranges::rank(segment)); + auto &&q = experimental::dr::shp::__detail::queue(experimental::dr::ranges::rank(segment)); auto &&local_policy = - dr::shp::__detail::dpl_policy(dr::ranges::rank(segment)); + experimental::dr::shp::__detail::dpl_policy(experimental::dr::ranges::rank(segment)); - auto &&local_segment = dr::shp::__detail::local(segment); + auto &&local_segment = experimental::dr::shp::__detail::local(segment); std::size_t *splitter_i = sycl::malloc_shared( n_splitters, q.get_device(), shp::context()); @@ -194,7 +194,7 @@ void sort(R &&r, Compare comp = Compare()) { segment_id = 0; for (auto &&segment : segments) { - auto &&q = dr::shp::__detail::queue(dr::ranges::rank(segment)); + auto &&q = experimental::dr::shp::__detail::queue(experimental::dr::ranges::rank(segment)); T *buffer = sycl::malloc_device(sorted_seg_sizes[segment_id], q); sorted_segments.push_back(buffer); @@ -205,7 +205,7 @@ void sort(R &&r, Compare comp = Compare()) { // Copy corresponding elements to each "sorted segment" segment_id = 0; for (auto &&segment : segments) { - auto &&local_segment = dr::shp::__detail::local(segment); + auto &&local_segment = experimental::dr::shp::__detail::local(segment); std::size_t *splitter_i = splitter_indices[segment_id]; @@ -232,13 +232,13 @@ void sort(R &&r, Compare comp = Compare()) { ++segment_id; } - dr::shp::__detail::wait(events); + experimental::dr::shp::__detail::wait(events); events.clear(); // Sort each of these new segments for (std::size_t i = 0; i < sorted_segments.size(); i++) { auto &&local_policy = - dr::shp::__detail::dpl_policy(dr::ranges::rank(segments[i])); + experimental::dr::shp::__detail::dpl_policy(experimental::dr::ranges::rank(segments[i])); T *seg = sorted_segments[i]; std::size_t n_elements = sorted_seg_sizes[i]; @@ -247,7 +247,7 @@ void sort(R &&r, Compare comp = Compare()) { events.push_back(e); } - dr::shp::__detail::wait(events); + experimental::dr::shp::__detail::wait(events); events.clear(); // Copy the results into the output. @@ -265,7 +265,7 @@ void sort(R &&r, Compare comp = Compare()) { rng::advance(d_first, n_elements); } - dr::shp::__detail::wait(events); + experimental::dr::shp::__detail::wait(events); // Free temporary memory. @@ -280,9 +280,9 @@ void sort(R &&r, Compare comp = Compare()) { sycl::free(medians, shp::context()); } -template > +template > void sort(RandomIt first, RandomIt last, Compare comp = Compare()) { sort(rng::subrange(first, last), comp); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp index 58aecae19b5..104c0e761a1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp @@ -3,16 +3,16 @@ // SPDX-License-Identifier: BSD-3-Clause #pragma once -#include -#include -#include +#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { /** * Applies the given function to a range and stores the result in another range, * beginning at out. - * \param policy use `dr::shp::par_unseq` here only + * \param policy use `experimental::dr::shp::par_unseq` here only * \param in the range of elements to transform * \param out the beginning of the destination range, may be equal to the * beginning of `in` range \param fn operation to apply to input elements @@ -23,8 +23,8 @@ namespace dr::shp { */ template -auto transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, - dr::distributed_iterator auto out, auto &&fn) { +auto transform(ExecutionPolicy &&policy, experimental::dr::distributed_range auto &&in, + experimental::dr::distributed_iterator auto out, auto &&fn) { static_assert( // currently only one policy supported std::is_same_v, device_policy>); @@ -37,7 +37,7 @@ auto transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, for (auto &&[in_seg, out_seg] : views::zip(in, rng::subrange(out, out_end)).zipped_segments()) { auto in_device = policy.get_devices()[in_seg.rank()]; - auto &&q = __detail::queue(dr::ranges::rank(in_seg)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(in_seg)); const std::size_t seg_size = rng::size(in_seg); assert(seg_size == rng::size(out_seg)); auto local_in_seg = __detail::local(in_seg); @@ -49,7 +49,7 @@ auto transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, })); } else { OutT *buffer = - sycl::malloc_device(seg_size, in_device, dr::shp::context()); + sycl::malloc_device(seg_size, in_device, experimental::dr::shp::context()); buffers.push_back(buffer); sycl::event compute_event = q.parallel_for( @@ -61,20 +61,20 @@ auto transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, __detail::wait(events); for (auto *b : buffers) - sycl::free(b, dr::shp::context()); + sycl::free(b, experimental::dr::shp::context()); return rng::unary_transform_result{ rng::end(in), out_end}; } -template +template auto transform(R &&in, Iter out, Fn &&fn) { - return transform(dr::shp::par_unseq, std::forward(in), + return transform(experimental::dr::shp::par_unseq, std::forward(in), std::forward(out), std::forward(fn)); } -template +template auto transform(ExecutionPolicy &&policy, Iter1 in_begin, Iter1 in_end, Iter2 out_end, Fn &&fn) { return transform( @@ -83,12 +83,12 @@ auto transform(ExecutionPolicy &&policy, Iter1 in_begin, Iter1 in_end, std::forward(out_end), std::forward(fn)); } -template auto transform(Iter1 in_begin, Iter1 in_end, Iter2 out_end, Fn &&fn) { - return transform(dr::shp::par_unseq, std::forward(in_begin), + return transform(experimental::dr::shp::par_unseq, std::forward(in_begin), std::forward(in_end), std::forward(out_end), std::forward(fn)); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp index eeff3b323a3..b54539b7955 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp @@ -8,9 +8,9 @@ #include -#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template using shared_allocator = sycl::usm_allocator; @@ -124,4 +124,4 @@ template class buffered_allocator { std::shared_ptr> buffers_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp index e0cf3175241..88829221b69 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp @@ -6,7 +6,7 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { namespace detail { @@ -25,4 +25,4 @@ inline std::tuple factor(std::size_t n) { } // namespace detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp deleted file mode 100644 index 41427430b55..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace dr::shp { - -template class distributed_dense_matrix_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_value_type = rng::range_value_t; - using scalar_reference = rng::range_reference_t; - - using value_type = dr::shp::matrix_entry; - - using reference = dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = distributed_dense_matrix_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using tile_type = L; - - using key_type = dr::index<>; - - constexpr distributed_dense_matrix_accessor() noexcept = default; - constexpr ~distributed_dense_matrix_accessor() noexcept = default; - constexpr distributed_dense_matrix_accessor( - const distributed_dense_matrix_accessor &) noexcept = default; - constexpr distributed_dense_matrix_accessor & - operator=(const distributed_dense_matrix_accessor &) noexcept = default; - - constexpr distributed_dense_matrix_accessor( - std::span tiles, key_type grid_idx, key_type tile_idx, - key_type grid_shape, key_type tile_shape, key_type matrix_shape) noexcept - : grid_idx_(grid_idx), tile_idx_(tile_idx), grid_shape_(grid_shape), - tile_shape_(tile_shape), matrix_shape_(matrix_shape), tiles_(tiles) {} - - constexpr distributed_dense_matrix_accessor & - operator+=(difference_type offset) noexcept { - std::size_t new_global_idx_ = get_global_idx_() + offset; - key_type new_global_idx = {new_global_idx_ / matrix_shape_[1], - new_global_idx_ % matrix_shape_[1]}; - key_type new_grid_idx = {new_global_idx[0] / tile_shape_[0], - new_global_idx[1] / tile_shape_[1]}; - - key_type new_tile_idx = {new_global_idx[0] % tile_shape_[0], - new_global_idx[1] % tile_shape_[1]}; - - grid_idx_ = new_grid_idx; - tile_idx_ = new_tile_idx; - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return grid_idx_ == other.grid_idx_ && tile_idx_ == other.tile_idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(get_global_idx_()) - other.get_global_idx_(); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - if (get_grid_idx() < other.get_grid_idx()) { - return true; - } else if (get_grid_idx() == other.get_grid_idx()) { - return get_local_idx() < other.get_local_idx(); - } else { - return false; - } - } - - constexpr reference operator*() const noexcept { - auto &&tile = tiles_[get_grid_idx()]; - auto &&value = tile[get_local_idx()]; - key_type idx = {tile_idx_[0] + grid_idx_[0] * tile_shape_[0], - tile_idx_[1] + grid_idx_[1] * tile_shape_[1]}; - return reference(idx, value); - } - -private: - size_type get_global_idx_() const noexcept { - auto gidx = get_global_idx(); - return gidx[0] * matrix_shape_[1] + gidx[1]; - } - - key_type get_global_idx() const noexcept { - return {grid_idx_[0] * tile_shape_[0] + tile_idx_[0], - grid_idx_[1] * tile_shape_[1] + tile_idx_[1]}; - } - - size_type get_grid_idx() const noexcept { - return grid_idx_[0] * grid_shape_[1] + grid_idx_[1]; - } - - size_type get_local_idx() const noexcept { - return tile_idx_[0] * tile_shape_[1] + tile_idx_[1]; - } - - size_type get_tile_size() const noexcept { - return tile_shape_[0] * tile_shape_[1]; - } - -private: - key_type grid_idx_; - key_type tile_idx_; - - key_type grid_shape_; - key_type tile_shape_; - key_type matrix_shape_; - - std::span tiles_; -}; - -template -using distributed_dense_matrix_iterator = - dr::iterator_adaptor>; - -template class distributed_dense_matrix { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using value_type = dr::shp::matrix_entry; - - using scalar_reference = rng::range_reference_t< - dr::shp::device_vector>>; - using const_scalar_reference = rng::range_reference_t< - const dr::shp::device_vector>>; - - using reference = dr::shp::matrix_ref; - using const_reference = dr::shp::matrix_ref; - - using key_type = dr::index<>; - - using iterator = distributed_dense_matrix_iterator< - T, dr::shp::device_vector>>; - - distributed_dense_matrix(key_type shape) - : shape_(shape), partition_(new dr::shp::block_cyclic()) { - init_(); - } - - distributed_dense_matrix(key_type shape, const matrix_partition &partition) - : shape_(shape), partition_(partition.clone()) { - init_(); - } - - size_type size() const noexcept { return shape()[0] * shape()[1]; } - - key_type shape() const noexcept { return shape_; } - - scalar_reference operator[](key_type index) { - std::size_t tile_i = index[0] / tile_shape_[0]; - std::size_t tile_j = index[1] / tile_shape_[1]; - - std::size_t local_i = index[0] % tile_shape_[0]; - std::size_t local_j = index[1] % tile_shape_[1]; - - auto &&tile = tiles_[tile_i * grid_shape_[1] + tile_j]; - - return tile[local_i * tile_shape_[1] + local_j]; - } - - const_scalar_reference operator[](key_type index) const { - std::size_t tile_i = index[0] / tile_shape_[0]; - std::size_t tile_j = index[1] / tile_shape_[1]; - - std::size_t local_i = index[0] % tile_shape_[0]; - std::size_t local_j = index[1] % tile_shape_[1]; - - auto &&tile = tiles_[tile_i * grid_shape_[1] + tile_j]; - - return tile[local_i * tile_shape_[1] + local_j]; - } - - iterator begin() { - return iterator(tiles_, key_type({0, 0}), key_type({0, 0}), grid_shape_, - tile_shape_, shape_); - } - - iterator end() { return begin() + shape()[0] * shape()[1]; } - - key_type tile_shape() const noexcept { return tile_shape_; } - - key_type grid_shape() const noexcept { return grid_shape_; } - - auto tile(key_type tile_index) { - auto &&[i, j] = tile_index; - auto iter = tiles_[i * grid_shape()[1] + j].begin(); - - std::size_t tm = - std::min(tile_shape()[0], shape()[0] - i * tile_shape()[0]); - std::size_t tn = - std::min(tile_shape()[1], shape()[1] - j * tile_shape()[1]); - - return dense_matrix_view>>>( - iter, key_type{tm, tn}, tile_shape()[1], - tiles_[i * grid_shape()[1] + j].rank()); - } - - std::vector>>>> - tiles() { - std::vector>>>> - views_; - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - auto iter = tiles_[i * grid_shape_[1] + j].begin(); - - std::size_t tm = - std::min(tile_shape_[0], shape()[0] - i * tile_shape_[0]); - std::size_t tn = - std::min(tile_shape_[1], shape()[1] - j * tile_shape_[1]); - - views_.emplace_back(iter, key_type{tm, tn}, tile_shape_[1], - tiles_[i * grid_shape_[1] + j].rank()); - } - } - return views_; - } - - template > - auto get_tile(key_type tile_index, const Allocator &alloc = Allocator{}) { - std::size_t nrows = get_tile_shape_(tile_index)[0]; - std::size_t ld = tile_shape_[1]; - std::size_t tile_size = nrows * ld; - dense_matrix local_tile(get_tile_shape_(tile_index), ld, - alloc); - auto remote_tile = tile(tile_index); - shp::copy(remote_tile.data(), remote_tile.data() + tile_size, - local_tile.data()); - return local_tile; - } - - template > - auto get_tile_async(key_type tile_index, - const Allocator &alloc = Allocator{}) { - std::size_t nrows = get_tile_shape_(tile_index)[0]; - std::size_t ld = tile_shape_[1]; - std::size_t tile_size = nrows * ld; - dense_matrix local_tile(get_tile_shape_(tile_index), ld, - alloc); - auto remote_tile = tile(tile_index); - auto event = shp::copy_async( - remote_tile.data(), remote_tile.data() + tile_size, local_tile.data()); - return future(std::move(local_tile), {event}); - } - - auto segments() { - std::vector>>>> - views_; - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - auto iter = tiles_[i * grid_shape_[1] + j].begin(); - - std::size_t tm = - std::min(tile_shape_[0], shape()[0] - i * tile_shape_[0]); - std::size_t tn = - std::min(tile_shape_[1], shape()[1] - j * tile_shape_[1]); - - std::size_t m_offset = i * tile_shape_[0]; - std::size_t n_offset = j * tile_shape_[1]; - - views_.emplace_back(iter, key_type{tm, tn}, - key_type{m_offset, n_offset}, tile_shape_[1], - tiles_[i * grid_shape_[1] + j].rank()); - } - } - return dr::__detail::owning_view(std::move(views_)); - } - -private: - void init_() { - grid_shape_ = partition_->grid_shape(shape()); - tile_shape_ = partition_->tile_shape(shape()); - - tiles_.reserve(grid_shape_[0] * grid_shape_[1]); - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t rank = partition_->tile_rank(shape(), {i, j}); - - auto device = dr::shp::devices()[rank]; - dr::shp::device_allocator alloc(dr::shp::context(), device); - - std::size_t tile_size = tile_shape_[0] * tile_shape_[1]; - - tiles_.emplace_back(tile_size, alloc, rank); - } - } - } - - key_type get_tile_shape_(key_type tile_index) { - auto &&[i, j] = tile_index; - std::size_t tm = std::min(tile_shape_[0], shape()[0] - i * tile_shape_[0]); - std::size_t tn = std::min(tile_shape_[1], shape()[1] - j * tile_shape_[1]); - return key_type{tm, tn}; - } - -private: - key_type shape_; - key_type grid_shape_; - key_type tile_shape_; - std::unique_ptr partition_; - - std::vector>> tiles_; -}; - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp index 0e4aa24d481..3cf785e0c76 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp @@ -4,15 +4,15 @@ #pragma once -#include -#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { -template > +template > class duplicated_vector { public: - using segment_type = dr::shp::device_vector; + using segment_type = experimental::dr::shp::device_vector; using value_type = T; using size_type = std::size_t; @@ -25,9 +25,9 @@ class duplicated_vector { capacity_ = count; std::size_t rank = 0; - for (auto &&device : dr::shp::devices()) { + for (auto &&device : experimental::dr::shp::devices()) { segments_.emplace_back( - segment_type(size(), Allocator(dr::shp::context(), device), rank++)); + segment_type(size(), Allocator(experimental::dr::shp::context(), device), rank++)); } } @@ -45,4 +45,4 @@ class duplicated_vector { std::size_t size_ = 0; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp deleted file mode 100644 index 0a460a1953f..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -#include - -namespace dr::shp { - -template class matrix_entry { -public: - using index_type = I; - using map_type = T; - - matrix_entry(dr::index index, const map_type &value) - : index_(index), value_(value) {} - matrix_entry(dr::index index, map_type &&value) - : index_(index), value_(std::move(value)) {} - - template - requires(std::is_constructible_v) - matrix_entry(dr::index index, U &&value) - : index_(index), value_(std::forward(value)) {} - - template - matrix_entry(Entry &&entry) - : index_(std::get<0>(entry)), value_(std::get<1>(entry)) {} - - template auto get() const noexcept { - if constexpr (Index == 0) { - return index(); - } - if constexpr (Index == 1) { - return value(); - } - } - - operator std::pair, T>() const noexcept { - return {{index_[0], index_[1]}, value_}; - } - - dr::index index() const noexcept { return index_; } - - map_type value() const noexcept { return value_; } - - template - requires(!std::is_same_v && - std::numeric_limits::max() >= std::numeric_limits::max()) - operator matrix_entry() const noexcept { - return matrix_entry(index_, value_); - } - - template - requires(!std::is_const_v && !std::is_same_v && - std::numeric_limits::max() >= std::numeric_limits::max()) - operator matrix_entry, U>() const noexcept { - return matrix_entry, U>(index_, value_); - } - - bool operator<(const matrix_entry &other) const noexcept { - if (index()[0] < other.index()[0]) { - return true; - } else if (index()[0] == other.index()[0] && - index()[1] < other.index()[1]) { - return true; - } - return false; - } - - matrix_entry() = default; - ~matrix_entry() = default; - - matrix_entry(const matrix_entry &) = default; - matrix_entry(matrix_entry &&) = default; - matrix_entry &operator=(const matrix_entry &) = default; - matrix_entry &operator=(matrix_entry &&) = default; - -private: - dr::index index_; - map_type value_; -}; - -} // namespace dr::shp - -namespace std { - -template - requires(!std::is_const_v) -void swap(dr::shp::matrix_entry a, dr::shp::matrix_entry b) { - dr::shp::matrix_entry other = a; - a = b; - b = other; -} - -template -struct tuple_element> - : tuple_element, T>> {}; - -template -struct tuple_size> : integral_constant { -}; - -} // namespace std - -namespace dr::shp { - -template -class matrix_ref { -public: - using scalar_type = T; - using index_type = I; - - using key_type = dr::index; - using map_type = T; - - using scalar_reference = TRef; - - using value_type = dr::shp::matrix_entry; - - matrix_ref(dr::index index, scalar_reference value) - : index_(index), value_(value) {} - - operator value_type() const noexcept { return value_type(index_, value_); } - - operator std::pair, T>() const noexcept { - return {{index_[0], index_[1]}, value_}; - } - - template - decltype(auto) get() const noexcept - requires(Index <= 1) - { - if constexpr (Index == 0) { - return index(); - } - if constexpr (Index == 1) { - return value(); - } - } - - dr::index index() const noexcept { return index_; } - - scalar_reference value() const noexcept { return value_; } - - template - requires(!std::is_same_v && - std::numeric_limits::max() >= std::numeric_limits::max()) - operator matrix_ref() const noexcept { - return matrix_ref(index_, value_); - } - - template - requires(!std::is_const_v && !std::is_same_v && - std::numeric_limits::max() >= std::numeric_limits::max()) - operator matrix_ref, U, TRef>() const noexcept { - return matrix_ref, U, TRef>(index_, value_); - } - - bool operator<(matrix_entry other) const noexcept { - if (index()[0] < other.index()[0]) { - return true; - } else if (index()[0] == other.index()[0] && - index()[1] < other.index()[1]) { - return true; - } - return false; - } - - matrix_ref() = delete; - ~matrix_ref() = default; - - matrix_ref(const matrix_ref &) = default; - matrix_ref &operator=(const matrix_ref &) = delete; - matrix_ref(matrix_ref &&) = default; - matrix_ref &operator=(matrix_ref &&) = default; - -private: - dr::index index_; - scalar_reference value_; -}; - -} // namespace dr::shp - -namespace std { - -template - requires(!std::is_const_v) -void swap(dr::shp::matrix_ref a, - dr::shp::matrix_ref b) { - dr::shp::matrix_entry other = a; - a = b; - b = other; -} - -template -struct tuple_element> - : tuple_element, TRef>> {}; - -template -struct tuple_size> - : integral_constant {}; - -template -inline decltype(auto) get(dr::shp::matrix_ref ref) - requires(Index <= 1) -{ - if constexpr (Index == 0) { - return ref.index(); - } - if constexpr (Index == 1) { - return ref.value(); - } -} - -template -inline decltype(auto) get(dr::shp::matrix_entry entry) - requires(Index <= 1) -{ - if constexpr (Index == 0) { - return entry.index(); - } - if constexpr (Index == 1) { - return entry.value(); - } -} - -} // namespace std diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp deleted file mode 100644 index f4cc45308fa..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp +++ /dev/null @@ -1,115 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -namespace dr::shp { - -namespace tile { - -// Special constant to indicate tile dimensions of -// {ceil(m / p_m), ceil(n / p_n)} should be chosen -// in order to evenly divide a dimension amongst the -// ranks in the processor grid. -inline constexpr std::size_t div = std::numeric_limits::max(); - -} // namespace tile - -class matrix_partition { -public: - virtual std::size_t tile_rank(dr::index<> matrix_shape, - dr::index<> tile_id) const = 0; - virtual dr::index<> grid_shape(dr::index<> matrix_shape) const = 0; - virtual dr::index<> tile_shape(dr::index<> matrix_shape) const = 0; - - virtual std::unique_ptr clone() const = 0; - virtual ~matrix_partition(){}; -}; - -class block_cyclic final : public matrix_partition { -public: - block_cyclic(dr::index<> tile_shape = {dr::shp::tile::div, - dr::shp::tile::div}, - dr::index<> grid_shape = detail::factor(dr::shp::nprocs())) - : tile_shape_(tile_shape), grid_shape_(grid_shape) {} - - block_cyclic(const block_cyclic &) noexcept = default; - - dr::index<> tile_shape() const { return tile_shape_; } - - std::size_t tile_rank(dr::index<> matrix_shape, dr::index<> tile_id) const { - dr::index<> pgrid_idx = {tile_id[0] % grid_shape_[0], - tile_id[1] % grid_shape_[1]}; - - auto pgrid = processor_grid_(); - - return pgrid[pgrid_idx[0] * grid_shape_[1] + pgrid_idx[1]]; - } - - dr::index<> grid_shape(dr::index<> matrix_shape) const { - auto ts = this->tile_shape(matrix_shape); - - return dr::index<>((matrix_shape[0] + ts[0] - 1) / ts[0], - (matrix_shape[1] + ts[1] - 1) / ts[1]); - } - - dr::index<> tile_shape(dr::index<> matrix_shape) const { - std::array tshape = {tile_shape_[0], tile_shape_[1]}; - - constexpr std::size_t ndims = 2; - for (std::size_t i = 0; i < ndims; i++) { - if (tshape[i] == dr::shp::tile::div) { - tshape[i] = (matrix_shape[i] + grid_shape_[i] - 1) / grid_shape_[i]; - } - } - - return tshape; - } - - std::unique_ptr clone() const noexcept { - return std::unique_ptr(new block_cyclic(*this)); - } - -private: - std::vector processor_grid_() const { - std::vector grid(grid_shape_[0] * grid_shape_[1]); - - for (std::size_t i = 0; i < grid.size(); i++) { - grid[i] = i; - } - return grid; - } - - dr::index<> tile_shape_; - dr::index<> grid_shape_; -}; // namespace dr::shp - -inline std::vector partition_matmul(std::size_t m, std::size_t n, - std::size_t k) { - dr::index<> c_pgrid = detail::factor(shp::nprocs()); - - block_cyclic c_block({dr::shp::tile::div, dr::shp::tile::div}, - {c_pgrid[0], c_pgrid[1]}); - - std::size_t k_block; - - if (m * k >= k * n) { - k_block = (shp::nprocs() + c_pgrid[0] - 1) / c_pgrid[0]; - } else { - k_block = (shp::nprocs() + c_pgrid[1] - 1) / c_pgrid[1]; - } - - block_cyclic a_block({dr::shp::tile::div, dr::shp::tile::div}, - {c_pgrid[0], k_block}); - block_cyclic b_block({dr::shp::tile::div, dr::shp::tile::div}, - {k_block, c_pgrid[1]}); - - return {a_block, b_block, c_block}; -} - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp deleted file mode 100644 index be164517eb4..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include - -namespace dr::shp { - -template > -class dense_matrix { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using allocator_type = Allocator; - - using scalar_pointer = typename std::allocator_traits::pointer; - - using scalar_reference = std::iter_reference_t; - using reference = dr::shp::matrix_ref; - - using key_type = dr::index<>; - using map_type = T; - - using iterator = dense_matrix_iterator; - - dense_matrix(key_type shape) - : allocator_(Allocator()), shape_(shape), ld_(shape[1]) { - data_ = allocator_.allocate(shape_[0] * shape_[1]); - } - - dense_matrix(key_type shape, std::size_t ld) - requires(std::is_default_constructible_v) - : allocator_(Allocator()), shape_(shape), ld_(ld) { - data_ = allocator_.allocate(shape_[0] * ld_); - } - - dense_matrix(key_type shape, std::size_t ld, const Allocator &alloc) - : allocator_(alloc), shape_(shape), ld_(ld) { - data_ = allocator_.allocate(shape_[0] * ld_); - } - - dense_matrix(dense_matrix &&other) - : allocator_(other.allocator_), data_(other.data_), shape_(other.shape_), - ld_(other.ld_) { - other.null_data_(); - } - - dense_matrix &operator=(dense_matrix &&other) { - deallocate_storage_(); - allocator_ = other.allocator_; - data_ = other.data_; - shape_ = other.shape_; - ld_ = other.ld_; - - other.null_data_(); - } - - dense_matrix(const dense_matrix &other) = delete; - dense_matrix &operator=(const dense_matrix &other) = delete; - - ~dense_matrix() { deallocate_storage_(); } - - key_type shape() const noexcept { return shape_; } - - size_type size() const noexcept { return shape()[0] * shape()[1]; } - - scalar_reference operator[](key_type idx) const { - return data_[idx[0] * ld_ + idx[1]]; - } - - iterator begin() const { - return iterator(data_, key_type{0, 0}, shape_, ld_); - } - - iterator end() const { - return iterator(data_, key_type{shape_[0], 0}, shape_, ld_); - } - - auto row(size_type row_index) const { - // return dense_matrix_row_view(data_ + row_index * ld_, row_index, - // shape()[1]); - auto row_elements = rng::views::iota(size_type(0), size_type(shape()[1])); - scalar_pointer data = data_ + row_index * ld_; - - return row_elements | rng::views::transform([=](auto column_index) { - return reference(key_type(row_index, column_index), - data[column_index]); - }); - } - - auto column(size_type column_index) const { - // return dense_matrix_column_view(data_ + column_index, column_index, - // shape()[0], ld_); - auto column_elements = - rng::views::iota(size_type(0), size_type(shape()[0])); - scalar_pointer data = data_ + column_index; - size_type ld = ld_; - - return column_elements | rng::views::transform([=](auto row_index) { - return reference(key_type(row_index, column_index), - data[row_index * ld]); - }); - } - - scalar_pointer data() const { return data_; } - - size_type ld() const { return ld_; } - - /* - auto local() const { - } - */ - -private: - void deallocate_storage_() { - if (data_ != nullptr) { - allocator_.deallocate(data_, shape_[0] * ld_); - } - } - - void null_data_() { - data_ = nullptr; - shape_ = {0, 0}; - ld_ = 0; - } - - allocator_type allocator_; - scalar_pointer data_; - key_type shape_; - size_type ld_; -}; - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp deleted file mode 100644 index 8f7a93e44d9..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp +++ /dev/null @@ -1,413 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace dr::shp { - -template - requires(rng::viewable_range) -class distributed_range_accessor { -public: - using segment_type = rng::range_value_t; - - using value_type = rng::range_value_t; - - using size_type = rng::range_size_t; - using difference_type = rng::range_difference_t; - - using reference = rng::range_reference_t; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = distributed_range_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - constexpr distributed_range_accessor() noexcept = default; - constexpr ~distributed_range_accessor() noexcept = default; - constexpr distributed_range_accessor( - const distributed_range_accessor &) noexcept = default; - constexpr distributed_range_accessor & - operator=(const distributed_range_accessor &) noexcept = default; - - constexpr distributed_range_accessor(Segments segments, size_type segment_id, - size_type idx) noexcept - : segments_(rng::views::all(std::forward(segments))), - segment_id_(segment_id), idx_(idx) {} - - constexpr distributed_range_accessor & - operator+=(difference_type offset) noexcept { - - while (offset > 0) { - difference_type current_offset = std::min( - offset, - difference_type(rng::size(*(segments_.begin() + segment_id_))) - - difference_type(idx_)); - idx_ += current_offset; - offset -= current_offset; - - if (idx_ >= rng::size((*(segments_.begin() + segment_id_)))) { - segment_id_++; - idx_ = 0; - } - } - - while (offset < 0) { - difference_type current_offset = - std::min(-offset, difference_type(idx_) + 1); - - difference_type new_idx = difference_type(idx_) - current_offset; - - if (new_idx < 0) { - segment_id_--; - new_idx = rng::size(*(segments_.begin() + segment_id_)) - 1; - } - - idx_ = new_idx; - } - - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return segment_id_ == other.segment_id_ && idx_ == other.idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(get_global_idx()) - other.get_global_idx(); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - if (segment_id_ < other.segment_id_) { - return true; - } else if (segment_id_ == other.segment_id_) { - return idx_ < other.idx_; - } else { - return false; - } - } - - constexpr reference operator*() const noexcept { - return *((*(segments_.begin() + segment_id_)).begin() + idx_); - } - -private: - size_type get_global_idx() const noexcept { - size_type cumulative_size = 0; - for (std::size_t i = 0; i < segment_id_; i++) { - cumulative_size += segments_[i].size(); - } - return cumulative_size + idx_; - } - - rng::views::all_t segments_; - size_type segment_id_ = 0; - size_type idx_ = 0; -}; - -template -using distributed_sparse_matrix_iterator = - dr::iterator_adaptor>; - -template class sparse_matrix { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using value_type = dr::shp::matrix_entry; - - using scalar_reference = rng::range_reference_t< - dr::shp::device_vector>>; - using const_scalar_reference = rng::range_reference_t< - const dr::shp::device_vector>>; - - using reference = dr::shp::matrix_ref; - using const_reference = dr::shp::matrix_ref; - - using key_type = dr::index; - - using segment_type = dr::shp::csr_matrix_view< - T, I, - rng::iterator_t>>, - rng::iterator_t>>>; - - // using iterator = sparse_matrix_iterator>>; - using iterator = - distributed_sparse_matrix_iterator &&>; - - sparse_matrix(key_type shape) - : shape_(shape), partition_(new dr::shp::block_cyclic()) { - init_(); - } - - sparse_matrix(key_type shape, double density) - : shape_(shape), partition_(new dr::shp::block_cyclic()) { - init_random_(density); - } - - sparse_matrix(key_type shape, double density, - const matrix_partition &partition) - : shape_(shape), partition_(partition.clone()) { - init_random_(density); - } - - sparse_matrix(key_type shape, const matrix_partition &partition) - : shape_(shape), partition_(partition.clone()) { - init_(); - } - - size_type size() const noexcept { return total_nnz_; } - - key_type shape() const noexcept { return shape_; } - - iterator begin() { return iterator(segments(), 0, 0); } - - iterator end() { - return iterator(segments(), grid_shape_[0] * grid_shape_[1], 0); - } - - segment_type tile(key_type tile_index) { - std::size_t tile_idx = tile_index[0] * grid_shape_[1] + tile_index[1]; - auto values = values_[tile_idx].begin(); - auto rowptr = rowptr_[tile_idx].begin(); - auto colind = colind_[tile_idx].begin(); - auto nnz = nnz_[tile_idx]; - - std::size_t tm = - std::min(tile_shape_[0], shape()[0] - tile_index[0] * tile_shape_[0]); - std::size_t tn = - std::min(tile_shape_[1], shape()[1] - tile_index[1] * tile_shape_[1]); - - return segment_type(values, rowptr, colind, key_type(tm, tn), nnz, - values_[tile_idx].rank()); - } - - // Note: this function is currently *not* asynchronous due to a deadlock - // in `gemv_benchmark`. I believe this is a SYCL bug. - template - auto copy_tile_async(key_type tile_index, - csr_matrix_view tile_view) { - std::size_t tile_idx = tile_index[0] * grid_shape_[1] + tile_index[1]; - auto &&values = values_[tile_idx]; - auto &&colind = colind_[tile_idx]; - auto &&rowptr = rowptr_[tile_idx]; - auto &&nnz = nnz_[tile_idx]; - - total_nnz_ -= nnz; - nnz = tile_view.size(); - - total_nnz_ += tile_view.size(); - - values.resize(tile_view.size()); - colind.resize(tile_view.size()); - rowptr.resize(tile_view.shape()[0] + 1); - - auto v_e = dr::shp::copy_async(tile_view.values_data(), - tile_view.values_data() + values.size(), - values.data()); - - auto c_e = dr::shp::copy_async(tile_view.colind_data(), - tile_view.colind_data() + colind.size(), - colind.data()); - - auto r_e = dr::shp::copy_async(tile_view.rowptr_data(), - tile_view.rowptr_data() + rowptr.size(), - rowptr.data()); - - tiles_ = generate_tiles_(); - segments_ = generate_segments_(); - - v_e.wait(); - c_e.wait(); - r_e.wait(); - - return __detail::combine_events({v_e, c_e, r_e}); - } - - template - void copy_tile(key_type tile_index, - csr_matrix_view tile_view) { - copy_tile_async(tile_index, tile_view).wait(); - } - - key_type tile_shape() const noexcept { return tile_shape_; } - - key_type grid_shape() const noexcept { return grid_shape_; } - - std::span tiles() { return std::span(tiles_); } - - std::span segments() { return std::span(segments_); } - -private: - std::vector generate_tiles_() { - std::vector views_; - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t tm = std::min(tile_shape_[0], - shape()[0] - i * tile_shape_[0]); - std::size_t tn = std::min(tile_shape_[1], - shape()[1] - j * tile_shape_[1]); - - std::size_t tile_idx = i * grid_shape_[1] + j; - - auto values = values_[tile_idx].begin(); - auto rowptr = rowptr_[tile_idx].begin(); - auto colind = colind_[tile_idx].begin(); - auto nnz = nnz_[tile_idx]; - - views_.emplace_back(values, rowptr, colind, key_type(tm, tn), nnz, - values_[tile_idx].rank()); - } - } - return views_; - } - - std::vector generate_segments_() { - std::vector views_; - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t tm = std::min(tile_shape_[0], - shape()[0] - i * tile_shape_[0]); - std::size_t tn = std::min(tile_shape_[1], - shape()[1] - j * tile_shape_[1]); - - std::size_t tile_idx = i * grid_shape_[1] + j; - - auto values = values_[tile_idx].begin(); - auto rowptr = rowptr_[tile_idx].begin(); - auto colind = colind_[tile_idx].begin(); - auto nnz = nnz_[tile_idx]; - - std::size_t m_offset = i * tile_shape_[0]; - std::size_t n_offset = j * tile_shape_[1]; - - views_.emplace_back(values, rowptr, colind, key_type(tm, tn), nnz, - values_[i * grid_shape_[1] + j].rank(), - key_type(m_offset, n_offset)); - } - } - return views_; - } - -private: - void init_() { - grid_shape_ = key_type(partition_->grid_shape(shape())); - tile_shape_ = key_type(partition_->tile_shape(shape())); - - values_.reserve(grid_shape_[0] * grid_shape_[1]); - rowptr_.reserve(grid_shape_[0] * grid_shape_[1]); - colind_.reserve(grid_shape_[0] * grid_shape_[1]); - nnz_.reserve(grid_shape_[0] * grid_shape_[1]); - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t rank = partition_->tile_rank(shape(), {i, j}); - - auto device = dr::shp::devices()[rank]; - dr::shp::device_allocator alloc(dr::shp::context(), device); - dr::shp::device_allocator i_alloc(dr::shp::context(), device); - - values_.emplace_back(1, alloc, rank); - rowptr_.emplace_back(2, i_alloc, rank); - colind_.emplace_back(1, i_alloc, rank); - nnz_.push_back(0); - rowptr_.back()[0] = 0; - rowptr_.back()[1] = 0; - } - } - tiles_ = generate_tiles_(); - segments_ = generate_segments_(); - } - - void init_random_(double density) { - grid_shape_ = key_type(partition_->grid_shape(shape())); - tile_shape_ = key_type(partition_->tile_shape(shape())); - - values_.reserve(grid_shape_[0] * grid_shape_[1]); - rowptr_.reserve(grid_shape_[0] * grid_shape_[1]); - colind_.reserve(grid_shape_[0] * grid_shape_[1]); - nnz_.reserve(grid_shape_[0] * grid_shape_[1]); - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t rank = partition_->tile_rank(shape(), {i, j}); - - std::size_t tm = std::min(tile_shape_[0], - shape()[0] - i * tile_shape_[0]); - std::size_t tn = std::min(tile_shape_[1], - shape()[1] - j * tile_shape_[1]); - - auto device = dr::shp::devices()[rank]; - dr::shp::device_allocator alloc(dr::shp::context(), device); - dr::shp::device_allocator i_alloc(dr::shp::context(), device); - - auto seed = i * grid_shape_[1] + j; - - auto csr = generate_random_csr(key_type(tm, tn), density, seed); - std::size_t nnz = csr.size(); - - dr::shp::device_vector> values( - csr.size(), alloc, rank); - dr::shp::device_vector> rowptr( - tm + 1, i_alloc, rank); - - dr::shp::device_vector> colind( - csr.size(), i_alloc, rank); - - dr::shp::copy(csr.values_data(), csr.values_data() + csr.size(), - values.data()); - dr::shp::copy(csr.rowptr_data(), csr.rowptr_data() + tm + 1, - rowptr.data()); - dr::shp::copy(csr.colind_data(), csr.colind_data() + csr.size(), - colind.data()); - - values_.push_back(std::move(values)); - rowptr_.emplace_back(std::move(rowptr)); - colind_.emplace_back(std::move(colind)); - nnz_.push_back(nnz); - total_nnz_ += nnz; - - delete[] csr.values_data(); - delete[] csr.rowptr_data(); - delete[] csr.colind_data(); - } - } - tiles_ = generate_tiles_(); - segments_ = generate_segments_(); - } - -private: - key_type shape_; - key_type grid_shape_; - key_type tile_shape_; - std::unique_ptr partition_; - - std::vector>> values_; - std::vector>> rowptr_; - std::vector>> colind_; - - std::vector nnz_; - std::size_t total_nnz_ = 0; - - std::vector tiles_; - std::vector segments_; -}; - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp index 523be315078..7ba49cb2546 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp @@ -4,18 +4,18 @@ #pragma once -#include -#include -#include -#include #include +#include +#include +#include +#include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { -inline constexpr auto local = dr::ranges::__detail::local; +inline constexpr auto local = experimental::dr::ranges::__detail::local; template concept is_syclmemcopyable = std::is_same_v, Dest> && @@ -87,4 +87,4 @@ inline void wait(const std::vector &events) { } // namespace __detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp index 21e72160be2..a9d9a7a1dd0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp @@ -8,9 +8,9 @@ #include -#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template requires(std::is_trivially_copyable_v || std::is_void_v) @@ -141,4 +141,4 @@ class device_ptr { T *pointer_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp index bf2042fd4b0..5cbf13f9544 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp @@ -4,11 +4,11 @@ #pragma once -#include +#include #include #include -namespace dr::shp { +namespace experimental::dr::shp { template requires(std::is_trivially_copyable_v || std::is_void_v) @@ -24,7 +24,7 @@ class device_ref { #ifdef __SYCL_DEVICE_ONLY__ return *pointer_; #else - auto &&q = dr::shp::__detail::default_queue(); + auto &&q = experimental::dr::shp::__detail::default_queue(); char buffer[sizeof(T)] __attribute__((aligned(sizeof(T)))); q.memcpy(reinterpret_cast(buffer), pointer_, sizeof(T)).wait(); return *reinterpret_cast(buffer); @@ -37,7 +37,7 @@ class device_ref { #ifdef __SYCL_DEVICE_ONLY__ *pointer_ = value; #else - auto &&q = dr::shp::__detail::default_queue(); + auto &&q = experimental::dr::shp::__detail::default_queue(); q.memcpy(pointer_, &value, sizeof(T)).wait(); #endif return *this; @@ -57,4 +57,4 @@ class device_ref { T *pointer_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp index 5f3a2282314..89147476bee 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp @@ -4,11 +4,11 @@ #pragma once -#include -#include +#include +#include #include -namespace dr::shp { +namespace experimental::dr::shp { // A `device_span` is simply a normal `std::span` that's // been decorated with an extra `rank()` function, showing @@ -41,7 +41,7 @@ class device_span : public std::span { */ template -class device_span : public dr::shp::span { +class device_span : public experimental::dr::shp::span { public: constexpr device_span() noexcept {} @@ -51,22 +51,22 @@ class device_span : public dr::shp::span { using reference = std::iter_reference_t; template - requires(dr::remote_range) + requires(experimental::dr::remote_range) device_span(R &&r) - : dr::shp::span(rng::begin(r), rng::size(r)), - rank_(dr::ranges::rank(r)) {} + : experimental::dr::shp::span(rng::begin(r), rng::size(r)), + rank_(experimental::dr::ranges::rank(r)) {} template device_span(R &&r, std::size_t rank) - : dr::shp::span(rng::begin(r), rng::size(r)), rank_(rank) {} + : experimental::dr::shp::span(rng::begin(r), rng::size(r)), rank_(rank) {} template constexpr device_span(It first, std::size_t count, std::size_t rank) - : dr::shp::span(first, count), rank_(rank) {} + : experimental::dr::shp::span(first, count), rank_(rank) {} template constexpr device_span(It first, End last, std::size_t rank) - : dr::shp::span(first, last), rank_(rank) {} + : experimental::dr::shp::span(first, last), rank_(rank) {} constexpr std::size_t rank() const noexcept { return rank_; } @@ -94,4 +94,4 @@ template device_span(R &&, std::size_t) -> device_span, rng::iterator_t>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp index a5b5144b5a1..b9f1ea4ff42 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp @@ -4,17 +4,17 @@ #pragma once -#include -#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template -class device_vector : public dr::shp::vector { +class device_vector : public experimental::dr::shp::vector { public: constexpr device_vector() noexcept {} - using base = dr::shp::vector; + using base = experimental::dr::shp::vector; using value_type = T; using size_type = std::size_t; @@ -34,4 +34,4 @@ template device_vector(std::size_t, const Alloc, std::size_t) -> device_vector; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp index b1423082253..59992298f6f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp @@ -6,13 +6,13 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template class distributed_span_accessor { public: @@ -102,7 +102,7 @@ template class distributed_span_accessor { } auto segments() const noexcept { - return dr::__detail::drop_segments(segments_, segment_id_, idx_); + return experimental::dr::__detail::drop_segments(segments_, segment_id_, idx_); } private: @@ -121,7 +121,7 @@ template class distributed_span_accessor { template using distributed_span_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; template class distributed_span : public rng::view_interface> { @@ -129,7 +129,7 @@ class distributed_span : public rng::view_interface> { using element_type = T; using value_type = std::remove_cv_t; - using segment_type = dr::shp::device_span; + using segment_type = experimental::dr::shp::device_span; using size_type = rng::range_size_t; using difference_type = rng::range_difference_t; @@ -154,21 +154,21 @@ class distributed_span : public rng::view_interface> { operator=(const distributed_span &) noexcept = default; template - requires(dr::remote_range>) + requires(experimental::dr::remote_range>) constexpr distributed_span(R &&segments) { for (auto &&segment : segments) { std::size_t size = rng::size(segment); segments_.push_back( - segment_type(rng::begin(segment), size, dr::ranges::rank(segment))); + segment_type(rng::begin(segment), size, experimental::dr::ranges::rank(segment))); size_ += size; } } - template constexpr distributed_span(R &&r) { - for (auto &&segment : dr::ranges::segments(std::forward(r))) { + template constexpr distributed_span(R &&r) { + for (auto &&segment : experimental::dr::ranges::segments(std::forward(r))) { std::size_t size = rng::size(segment); segments_.push_back( - segment_type(rng::begin(segment), size, dr::ranges::rank(segment))); + segment_type(rng::begin(segment), size, experimental::dr::ranges::rank(segment))); size_ += size; } } @@ -249,9 +249,9 @@ distributed_span(R &&segments) -> distributed_span>, rng::iterator_t>>; -template +template distributed_span(R &&r) -> distributed_span< rng::range_value_t, - rng::iterator_t>>; + rng::iterator_t>>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp index 823862c21ba..1175f7ce6d5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp @@ -8,13 +8,13 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template class distributed_vector_accessor { public: @@ -92,7 +92,7 @@ template class distributed_vector_accessor { } auto segments() const noexcept { - return dr::__detail::drop_segments(segments_, segment_id_, idx_); + return experimental::dr::__detail::drop_segments(segments_, segment_id_, idx_); } private: @@ -108,17 +108,17 @@ template class distributed_vector_accessor { template using distributed_vector_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; // TODO: support teams, distributions /// distributed vector -template > +template > struct distributed_vector { public: - using segment_type = dr::shp::device_vector; + using segment_type = experimental::dr::shp::device_vector; using const_segment_type = - std::add_const_t>; + std::add_const_t>; using value_type = T; using size_type = std::size_t; @@ -137,27 +137,27 @@ struct distributed_vector { using allocator_type = Allocator; distributed_vector(std::size_t count = 0) { - assert(dr::shp::devices().size() > 0); + assert(experimental::dr::shp::devices().size() > 0); size_ = count; segment_size_ = - (count + dr::shp::devices().size() - 1) / dr::shp::devices().size(); - capacity_ = segment_size_ * dr::shp::devices().size(); + (count + experimental::dr::shp::devices().size() - 1) / experimental::dr::shp::devices().size(); + capacity_ = segment_size_ * experimental::dr::shp::devices().size(); std::size_t rank = 0; - for (auto &&device : dr::shp::devices()) { + for (auto &&device : experimental::dr::shp::devices()) { segments_.emplace_back(segment_type( - segment_size_, Allocator(dr::shp::context(), device), rank++)); + segment_size_, Allocator(experimental::dr::shp::context(), device), rank++)); } } distributed_vector(std::size_t count, const T &value) : distributed_vector(count) { - dr::shp::fill(*this, value); + experimental::dr::shp::fill(*this, value); } distributed_vector(std::initializer_list init) : distributed_vector(init.size()) { - dr::shp::copy(rng::begin(init), rng::end(init), begin()); + experimental::dr::shp::copy(rng::begin(init), rng::end(init), begin()); } reference operator[](size_type pos) { @@ -174,10 +174,10 @@ struct distributed_vector { size_type size() const noexcept { return size_; } - auto segments() { return dr::__detail::take_segments(segments_, size()); } + auto segments() { return experimental::dr::__detail::take_segments(segments_, size()); } auto segments() const { - return dr::__detail::take_segments(segments_, size()); + return experimental::dr::__detail::take_segments(segments_, size()); } iterator begin() { return iterator(segments_, 0, 0, segment_size_); } @@ -201,7 +201,7 @@ struct distributed_vector { void resize(size_type count, const value_type &value) { distributed_vector other(count, value); std::size_t copy_size = std::min(other.size(), size()); - dr::shp::copy(begin(), begin() + copy_size, other.begin()); + experimental::dr::shp::copy(begin(), begin() + copy_size, other.begin()); *this = std::move(other); } @@ -214,4 +214,4 @@ struct distributed_vector { std::size_t segment_size_ = 0; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp index 1433e40b9c8..7678e479f07 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp @@ -7,9 +7,9 @@ #include #include -#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template class future { public: @@ -45,4 +45,4 @@ template class future { std::vector events_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp index e6f99238b08..667d21f6ef0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp @@ -11,11 +11,11 @@ #include #include -#include -#include #include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -103,4 +103,4 @@ inline auto &dpl_policy(std::size_t rank) { } // namespace __detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp index 7c6d7e29ac3..f12180c6db1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template class id { public: @@ -92,7 +92,7 @@ class segment_range_accessor { size_type idx_ = 0; }; -using segment_range_iterator = dr::iterator_adaptor; +using segment_range_iterator = experimental::dr::iterator_adaptor; template class segment_range { public: @@ -129,24 +129,5 @@ template class segment_range { std::size_t global_offset_; }; -/* -template auto distributed_iota_view(R &&r) { - static_assert(dr::distributed_contiguous_range); - if constexpr (dr::distributed_contiguous_range) { - std::vector> iota_segments; - std::size_t global_offset = 0; - std::size_t segment_id = 0; - for (auto &&segment : r.segments()) { - iota_segments.push_back( - segment_range(segment_id, segment.size(), global_offset)); - global_offset += segment.size(); - segment_id++; - } - return dr::shp::distributed_span(iota_segments); - } else { - return segment_range(0, rng::size(r), 0); - } -} -*/ -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp index 4b9a4b7fd22..19864776395 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp @@ -4,14 +4,14 @@ #pragma once -#include -#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template auto enumerate(R &&r) { auto i = rng::views::iota(uint32_t(0), uint32_t(rng::size(r))); - return dr::shp::zip_view(i, r); + return experimental::dr::shp::zip_view(i, r); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp index 678c9f0a8d5..4bc2c0ed4d3 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp @@ -6,12 +6,12 @@ #include -#include +#include -namespace dr::shp { +namespace experimental::dr::shp { template -class span : public rng::view_interface> { +class span : public rng::view_interface> { public: static_assert(std::is_same_v, T>); @@ -58,4 +58,4 @@ span(R &&) -> span, rng::iterator_t>; template span(Iter first, std::size_t count) -> span, Iter>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp index 479d3bf5207..6750c0a1d79 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template sycl::device select_device(Selector &&selector) { sycl::device d; @@ -177,44 +177,45 @@ template void print_range(Range &&r, std::string label = "") { std::cout << "]" << std::endl; } -template -void print_matrix(Matrix &&m, std::string label = "") { - std::cout << m.shape()[0] << " x " << m.shape()[1] << " matrix with " - << m.size() << " stored values"; - if (label != "") { - std::cout << " \"" << label << "\""; - } - std::cout << std::endl; +// template +// void print_matrix(Matrix &&m, std::string label = "") { +// std::cout << m.shape()[0] << " x " << m.shape()[1] << " matrix with " +// << m.size() << " stored values"; +// if (label != "") { +// std::cout << " \"" << label << "\""; +// } +// std::cout << std::endl; - for (auto &&tuple : m) { - auto &&[index, value] = tuple; - auto &&[i, j] = index; +// for (auto &&tuple : m) { +// auto &&[index, value] = tuple; +// auto &&[i, j] = index; - std::cout << "(" << i << ", " << j << "): " << value << std::endl; - } -} +// std::cout << "(" << i << ", " << j << "): " << value << std::endl; +// } +// } template void print_range_details(R &&r, std::string label = "") { if (label != "") { std::cout << "\"" << label << "\" "; } - std::cout << "distributed range with " << rng::size(dr::ranges::segments(r)) - << " segments." << std::endl; + std::cout << "distributed range with " + << rng::size(experimental::dr::ranges::segments(r)) << " segments." + << std::endl; std::size_t idx = 0; - for (auto &&segment : dr::ranges::segments(r)) { + for (auto &&segment : experimental::dr::ranges::segments(r)) { std::cout << "Seg " << idx++ << ", size " << segment.size() << " (rank " - << dr::ranges::rank(segment) << ")" << std::endl; + << experimental::dr::ranges::rank(segment) << ")" << std::endl; } } -template +template void range_details(R &&r, std::size_t width = 80) { std::size_t size = rng::size(r); - for (auto &&[idx, segment] : - dr::__detail::enumerate(dr::ranges::segments(r))) { + for (auto &&[idx, segment] : experimental::dr::__detail::enumerate( + experimental::dr::ranges::segments(r))) { std::size_t local_size = rng::size(segment); double percent = double(local_size) / size; @@ -228,8 +229,8 @@ void range_details(R &&r, std::size_t width = 80) { std::size_t after_whitespace = whitespace - initial_whitespace; std::cout << "[" << std::string(initial_whitespace, ' ') - << dr::ranges::rank(segment) << std::string(after_whitespace, ' ') - << "]"; + << experimental::dr::ranges::rank(segment) + << std::string(after_whitespace, ' ') << "]"; } std::cout << std::endl; } @@ -243,4 +244,4 @@ concept sycl_device_selector = requires(T &t, const sycl::device &device) { } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp deleted file mode 100644 index 2e801ce8d85..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp +++ /dev/null @@ -1,170 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -namespace dr::shp { - -namespace __detail { - -template > -class coo_matrix { -public: - using value_type = dr::shp::matrix_entry; - using scalar_type = T; - using index_type = I; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using allocator_type = Allocator; - - using key_type = dr::index; - using map_type = T; - - using backend_allocator_type = typename std::allocator_traits< - allocator_type>::template rebind_alloc; - using backend_type = std::vector; - - using iterator = typename backend_type::iterator; - using const_iterator = typename backend_type::const_iterator; - - using reference = dr::shp::matrix_ref; - using const_reference = dr::shp::matrix_ref, I>; - - using scalar_reference = T &; - - coo_matrix(dr::index shape) : shape_(shape) {} - - dr::index shape() const noexcept { return shape_; } - - size_type size() const noexcept { return tuples_.size(); } - - void reserve(size_type new_cap) { tuples_.reserve(new_cap); } - - iterator begin() noexcept { return tuples_.begin(); } - - const_iterator begin() const noexcept { return tuples_.begin(); } - - iterator end() noexcept { return tuples_.end(); } - - const_iterator end() const noexcept { return tuples_.end(); } - - template void insert(InputIt first, InputIt last) { - for (auto iter = first; iter != last; ++iter) { - insert(*iter); - } - } - - template void push_back(InputIt first, InputIt last) { - for (auto iter = first; iter != last; ++iter) { - push_back(*iter); - } - } - - void push_back(const value_type &value) { tuples_.push_back(value); } - - template void assign_tuples(InputIt first, InputIt last) { - tuples_.assign(first, last); - } - - std::pair insert(value_type &&value) { - auto &&[insert_index, insert_value] = value; - for (auto iter = begin(); iter != end(); ++iter) { - auto &&[index, v] = *iter; - if (index == insert_index) { - return {iter, false}; - } - } - tuples_.push_back(value); - return {--tuples_.end(), true}; - } - - std::pair insert(const value_type &value) { - auto &&[insert_index, insert_value] = value; - for (auto iter = begin(); iter != end(); ++iter) { - auto &&[index, v] = *iter; - if (index == insert_index) { - return {iter, false}; - } - } - tuples_.push_back(value); - return {--tuples_.end(), true}; - } - - template - std::pair insert_or_assign(key_type k, M &&obj) { - for (auto iter = begin(); iter != end(); ++iter) { - auto &&[index, v] = *iter; - if (index == k) { - v = std::forward(obj); - return {iter, false}; - } - } - tuples_.push_back({k, std::forward(obj)}); - return {--tuples_.end(), true}; - } - - iterator find(key_type key) noexcept { - return std::find_if(begin(), end(), [&](auto &&v) { - auto &&[i, v_] = v; - return i == key; - }); - } - - const_iterator find(key_type key) const noexcept { - return std::find_if(begin(), end(), [&](auto &&v) { - auto &&[i, v_] = v; - return i == key; - }); - } - - void reshape(dr::index shape) { - bool all_inside = true; - for (auto &&[index, v] : *this) { - auto &&[i, j] = index; - if (!(i < shape[0] && j < shape[1])) { - all_inside = false; - break; - } - } - - if (all_inside) { - shape_ = shape; - return; - } else { - coo_matrix new_tuples(shape); - for (auto &&[index, v] : *this) { - auto &&[i, j] = index; - if (i < shape[0] && j < shape[1]) { - new_tuples.insert({index, v}); - } - } - shape_ = shape; - assign_tuples(new_tuples.begin(), new_tuples.end()); - } - } - - coo_matrix() = default; - ~coo_matrix() = default; - coo_matrix(const coo_matrix &) = default; - coo_matrix(coo_matrix &&) = default; - coo_matrix &operator=(const coo_matrix &) = default; - coo_matrix &operator=(coo_matrix &&) = default; - - std::size_t nbytes() const noexcept { - return tuples_.size() * sizeof(value_type); - } - -private: - dr::index shape_; - backend_type tuples_; -}; - -} // namespace __detail - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp deleted file mode 100644 index f72e1fc3cb4..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp +++ /dev/null @@ -1,92 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include -#include - -namespace dr::shp { - -namespace { - -template struct uniform_distribution { - using type = std::uniform_int_distribution; -}; - -template struct uniform_distribution { - using type = std::uniform_real_distribution; -}; - -template -using uniform_distribution_t = typename uniform_distribution::type; - -} // namespace - -template -auto generate_random_csr(dr::index shape, double density = 0.01, - unsigned int seed = 0) { - - assert(density >= 0.0 && density < 1.0); - - std::map, T> tuples; - - std::size_t nnz = density * shape[0] * shape[1]; - - std::mt19937 gen(seed); - std::uniform_int_distribution row(0, shape[0] - 1); - std::uniform_int_distribution column(0, shape[1] - 1); - - uniform_distribution_t value_gen(0, 1); - - while (tuples.size() < nnz) { - auto i = row(gen); - auto j = column(gen); - if (tuples.find({i, j}) == tuples.end()) { - T value = value_gen(gen); - tuples.insert({{i, j}, value}); - } - } - - T *values = new T[nnz]; - I *rowptr = new I[shape[0] + 1]; - I *colind = new I[nnz]; - - rowptr[0] = 0; - - std::size_t r = 0; - std::size_t c = 0; - for (auto iter = tuples.begin(); iter != tuples.end(); ++iter) { - auto &&[index, value] = *iter; - auto &&[i, j] = index; - - values[c] = value; - colind[c] = j; - - while (r < i) { - if (r + 1 > shape[0]) { - // TODO: exception? - // throw std::runtime_error("csr_matrix_impl_: given invalid matrix"); - } - rowptr[r + 1] = c; - r++; - } - c++; - - if (c > nnz) { - // TODO: exception? - // throw std::runtime_error("csr_matrix_impl_: given invalid matrix"); - } - } - - for (; r < shape[0]; r++) { - rowptr[r + 1] = nnz; - } - - return csr_matrix_view(values, rowptr, colind, shape, nnz, 0); -} - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp deleted file mode 100644 index 77ee2359ae1..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace dr::shp { - -namespace __detail { - -// Preconditions: -// 1) `tuples` sorted by row, column -// 2) `tuples` has shape `shape` -// 3) `tuples` has `nnz` elements -template -auto convert_to_csr(Tuples &&tuples, dr::index<> shape, std::size_t nnz, - Allocator &&allocator) { - auto &&[index, v] = *tuples.begin(); - auto &&[i, j] = index; - - using T = std::remove_reference_t; - using I = std::remove_reference_t; - - typename std::allocator_traits::template rebind_alloc - i_allocator(allocator); - - T *values = allocator.allocate(nnz); - I *rowptr = i_allocator.allocate(shape[0] + 1); - I *colind = i_allocator.allocate(nnz); - - rowptr[0] = 0; - - std::size_t r = 0; - std::size_t c = 0; - for (auto iter = tuples.begin(); iter != tuples.end(); ++iter) { - auto &&[index, value] = *iter; - auto &&[i, j] = index; - - values[c] = value; - colind[c] = j; - - while (r < i) { - assert(r + 1 <= shape[0]); - // throw std::runtime_error("csr_matrix_impl_: given invalid matrix"); - rowptr[r + 1] = c; - r++; - } - c++; - - assert(c <= nnz); - // throw std::runtime_error("csr_matrix_impl_: given invalid matrix"); - } - - for (; r < shape[0]; r++) { - rowptr[r + 1] = nnz; - } - - return csr_matrix_view(values, rowptr, colind, - dr::index(shape[0], shape[1]), nnz, 0); -} - -/// Read in the Matrix Market file at location `file_path` and a return -/// a coo_matrix data structure with its contents. -template -inline coo_matrix mmread(std::string file_path, bool one_indexed = true) { - using size_type = std::size_t; - - std::ifstream f; - - f.open(file_path.c_str()); - - if (!f.is_open()) { - // TODO better choice of exception. - throw std::runtime_error("mmread: cannot open " + file_path); - } - - std::string buf; - - // Make sure the file is matrix market matrix, coordinate, and check whether - // it is symmetric. If the matrix is symmetric, non-diagonal elements will - // be inserted in both (i, j) and (j, i). Error out if skew-symmetric or - // Hermitian. - std::getline(f, buf); - std::istringstream ss(buf); - std::string item; - ss >> item; - if (item != "%%MatrixMarket") { - throw std::runtime_error(file_path + - " could not be parsed as a Matrix Market file."); - } - ss >> item; - if (item != "matrix") { - throw std::runtime_error(file_path + - " could not be parsed as a Matrix Market file."); - } - ss >> item; - if (item != "coordinate") { - throw std::runtime_error(file_path + - " could not be parsed as a Matrix Market file."); - } - bool pattern; - ss >> item; - if (item == "pattern") { - pattern = true; - } else { - pattern = false; - } - // TODO: do something with real vs. integer vs. pattern? - ss >> item; - bool symmetric; - if (item == "general") { - symmetric = false; - } else if (item == "symmetric") { - symmetric = true; - } else { - throw std::runtime_error(file_path + " has an unsupported matrix type"); - } - - bool outOfComments = false; - while (!outOfComments) { - std::getline(f, buf); - - if (buf[0] != '%') { - outOfComments = true; - } - } - - I m, n, nnz; - // std::istringstream ss(buf); - ss.clear(); - ss.str(buf); - ss >> m >> n >> nnz; - - // NOTE for symmetric matrices: `nnz` holds the number of stored values in - // the matrix market file, while `matrix.nnz_` will hold the total number of - // stored values (including "mirrored" symmetric values). - coo_matrix matrix({m, n}); - if (symmetric) { - matrix.reserve(2 * nnz); - } else { - matrix.reserve(nnz); - } - - size_type c = 0; - while (std::getline(f, buf)) { - I i, j; - T v; - std::istringstream ss(buf); - if (!pattern) { - ss >> i >> j >> v; - } else { - ss >> i >> j; - v = T(1); - } - if (one_indexed) { - i--; - j--; - } - - if (i >= m || j >= n) { - throw std::runtime_error( - "read_MatrixMarket: file has nonzero out of bounds."); - } - - matrix.push_back({{i, j}, v}); - - if (symmetric && i != j) { - matrix.push_back({{j, i}, v}); - } - - c++; - if (c > nnz) { - throw std::runtime_error("read_MatrixMarket: error reading Matrix Market " - "file, file has more nonzeros than reported."); - } - } - - auto sort_fn = [](const auto &a, const auto &b) { - auto &&[a_index, a_value] = a; - auto &&[b_index, b_value] = b; - auto &&[a_i, a_j] = a_index; - auto &&[b_i, b_j] = b_index; - if (a_i < b_i) { - return true; - } else if (a_i == b_i) { - if (a_j < b_j) { - return true; - } - } - return false; - }; - - std::sort(matrix.begin(), matrix.end(), sort_fn); - - f.close(); - - return matrix; -} - -template -void destroy_csr_matrix_view(dr::shp::csr_matrix_view view, - Allocator &&alloc) { - alloc.deallocate(view.values_data(), view.size()); - typename std::allocator_traits::template rebind_alloc i_alloc( - alloc); - i_alloc.deallocate(view.colind_data(), view.size()); - i_alloc.deallocate(view.rowptr_data(), view.shape()[0] + 1); -} - -} // namespace __detail - -template -auto create_distributed(dr::shp::csr_matrix_view local_mat, - const matrix_partition &partition) { - dr::shp::sparse_matrix a(local_mat.shape(), partition); - - std::vector> views; - std::vector events; - views.reserve(a.grid_shape()[0] * a.grid_shape()[1]); - - for (I i = 0; i < a.grid_shape()[0]; i++) { - for (I j = 0; j < a.grid_shape()[1]; j++) { - auto &&tile = a.tile({i, j}); - dr::index row_bounds(i * a.tile_shape()[0], - i * a.tile_shape()[0] + tile.shape()[0]); - dr::index column_bounds(j * a.tile_shape()[1], - j * a.tile_shape()[1] + tile.shape()[1]); - - auto local_submat = local_mat.submatrix(row_bounds, column_bounds); - - auto submatrix_shape = dr::index(row_bounds[1] - row_bounds[0], - column_bounds[1] - column_bounds[0]); - - auto copied_submat = __detail::convert_to_csr( - local_submat, submatrix_shape, rng::distance(local_submat), - std::allocator{}); - - auto e = a.copy_tile_async({i, j}, copied_submat); - - views.push_back(copied_submat); - events.push_back(e); - } - } - __detail::wait(events); - - for (auto &&view : views) { - __detail::destroy_csr_matrix_view(view, std::allocator{}); - } - - return a; -} - -template -auto mmread(std::string file_path, const matrix_partition &partition, - bool one_indexed = true) { - auto m = __detail::mmread(file_path, one_indexed); - auto shape = m.shape(); - auto nnz = m.size(); - - auto local_mat = __detail::convert_to_csr(m, shape, nnz, std::allocator{}); - - auto a = create_distributed(local_mat, partition); - - __detail::destroy_csr_matrix_view(local_mat, std::allocator{}); - - return a; -} - -template -auto mmread(std::string file_path, bool one_indexed = true) { - return mmread( - file_path, - dr::shp::block_cyclic({dr::shp::tile::div, dr::shp::tile::div}, - {dr::shp::nprocs(), 1}), - one_indexed); -} - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp index 7860e1f22ed..3a4b35cb7ae 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp @@ -6,7 +6,7 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { // TODO: deal properly with non-trivially destructible types // - constructors, destructors, assign @@ -245,4 +245,4 @@ template > class vector { allocator_type allocator_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp deleted file mode 100644 index 0be6941398c..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -namespace dr::shp { - -template -class csr_matrix_view_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_type = std::iter_value_t; - using scalar_reference = std::iter_reference_t; - - using index_type = I; - - using value_type = dr::shp::matrix_entry; - - using reference = dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = csr_matrix_view_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using key_type = dr::index; - - constexpr csr_matrix_view_accessor() noexcept = default; - constexpr ~csr_matrix_view_accessor() noexcept = default; - constexpr csr_matrix_view_accessor( - const csr_matrix_view_accessor &) noexcept = default; - constexpr csr_matrix_view_accessor & - operator=(const csr_matrix_view_accessor &) noexcept = default; - - constexpr csr_matrix_view_accessor(TIter values, IIter rowptr, IIter colind, - size_type idx, index_type row, - size_type row_dim) noexcept - : values_(values), rowptr_(rowptr), colind_(colind), idx_(idx), row_(row), - row_dim_(row_dim), idx_offset_(key_type{0, 0}) { - fast_forward_row(); - } - - constexpr csr_matrix_view_accessor(TIter values, IIter rowptr, IIter colind, - size_type idx, index_type row, - size_type row_dim, - key_type idx_offset) noexcept - : values_(values), rowptr_(rowptr), colind_(colind), idx_(idx), row_(row), - row_dim_(row_dim), idx_offset_(idx_offset) { - fast_forward_row(); - } - - // Given that `idx_` has just been advanced to an element - // possibly in a new row, advance `row_` to find the new row. - // That is: - // Advance `row_` until idx_ >= rowptr_[row_] && idx_ < rowptr_[row_+1] - void fast_forward_row() noexcept { - while (row_ < row_dim_ - 1 && idx_ >= rowptr_[row_ + 1]) { - row_++; - } - } - - // Given that `idx_` has just been retreated to an element - // possibly in a previous row, retreat `row_` to find the new row. - // That is: - // Retreat `row_` until idx_ >= rowptr_[row_] && idx_ < rowptr_[row_+1] - void fast_backward_row() noexcept { - while (idx_ < rowptr_[row_]) { - row_--; - } - } - - constexpr csr_matrix_view_accessor & - operator+=(difference_type offset) noexcept { - idx_ += offset; - if (offset < 0) { - fast_backward_row(); - } else { - fast_forward_row(); - } - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return idx_ == other.idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(idx_) - difference_type(other.idx_); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - return idx_ < other.idx_; - } - - constexpr reference operator*() const noexcept { - return reference( - key_type(row_ + idx_offset_[0], colind_[idx_] + idx_offset_[1]), - values_[idx_]); - } - -private: - TIter values_; - IIter rowptr_; - IIter colind_; - size_type idx_; - index_type row_; - size_type row_dim_; - key_type idx_offset_; -}; - -template -using csr_matrix_view_iterator = - dr::iterator_adaptor>; - -template -class csr_matrix_view - : public rng::view_interface> { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_reference = std::iter_reference_t; - using reference = dr::shp::matrix_ref; - - using scalar_type = T; - using index_type = I; - - using key_type = dr::index; - using map_type = T; - - using iterator = csr_matrix_view_iterator; - - csr_matrix_view(TIter values, IIter rowptr, IIter colind, key_type shape, - size_type nnz, size_type rank) - : values_(values), rowptr_(rowptr), colind_(colind), shape_(shape), - nnz_(nnz), rank_(rank), idx_offset_(key_type{0, 0}) {} - - csr_matrix_view(TIter values, IIter rowptr, IIter colind, key_type shape, - size_type nnz, size_type rank, key_type idx_offset) - : values_(values), rowptr_(rowptr), colind_(colind), shape_(shape), - nnz_(nnz), rank_(rank), idx_offset_(idx_offset) {} - - key_type shape() const noexcept { return shape_; } - - size_type size() const noexcept { return nnz_; } - - std::size_t rank() const { return rank_; } - - iterator begin() const { - return iterator(values_, rowptr_, colind_, 0, 0, shape()[1], idx_offset_); - } - - iterator end() const { - return iterator(values_, rowptr_, colind_, nnz_, shape()[1], shape()[1], - idx_offset_); - } - - auto row(I row_index) const { - I first = rowptr_[row_index]; - I last = rowptr_[row_index + 1]; - - TIter values = values_; - IIter colind = colind_; - - auto row_elements = rng::views::iota(first, last); - - return row_elements | rng::views::transform([=](auto idx) { - return reference(key_type(row_index, colind[idx]), values[idx]); - }); - } - - auto submatrix(key_type rows, key_type columns) const { - return rng::views::iota(rows[0], rows[1]) | - rng::views::transform([=, *this](auto &&row_index) { - return row(row_index) | rng::views::drop_while([=](auto &&e) { - auto &&[index, v] = e; - return index[1] < columns[0]; - }) | - rng::views::take_while([=](auto &&e) { - auto &&[index, v] = e; - return index[1] < columns[1]; - }) | - rng::views::transform([=](auto &&elem) { - auto &&[index, v] = elem; - auto &&[i, j] = index; - return reference(key_type(i - rows[0], j - columns[0]), - v); - }); - }) | - rng::views::join; - } - - auto values_data() const { return values_; } - - auto rowptr_data() const { return rowptr_; } - - auto colind_data() const { return colind_; } - -private: - TIter values_; - IIter rowptr_; - IIter colind_; - - key_type shape_; - size_type nnz_; - - size_type rank_; - key_type idx_offset_; -}; - -template -csr_matrix_view(TIter, IIter, IIter, Args &&...) - -> csr_matrix_view, std::iter_value_t, - TIter, IIter>; - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp deleted file mode 100644 index b67c5635cc8..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -namespace dr::shp { -template class dense_matrix_column_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_value_type = std::iter_value_t; - using scalar_reference = std::iter_reference_t; - - using value_type = dr::shp::matrix_entry; - - using reference = dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = dense_matrix_column_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using key_type = dr::index<>; - - constexpr dense_matrix_column_accessor() noexcept = default; - constexpr ~dense_matrix_column_accessor() noexcept = default; - constexpr dense_matrix_column_accessor( - const dense_matrix_column_accessor &) noexcept = default; - constexpr dense_matrix_column_accessor & - operator=(const dense_matrix_column_accessor &) noexcept = default; - - constexpr dense_matrix_column_accessor(Iter data, std::size_t i, - std::size_t j, std::size_t ld) noexcept - : data_(data), i_(i), j_(j), ld_(ld) {} - - constexpr dense_matrix_column_accessor & - operator+=(difference_type offset) noexcept { - i_ += offset; - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return i_ == other.i_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(i_) - difference_type(other.i_); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - return i_ < other.i_; - } - - constexpr reference operator*() const noexcept { - return reference(key_type({i_, j_}), data_[i_ * ld_]); - } - -private: - size_type i_, j_; - size_type ld_; - - Iter data_; -}; - -template -using dense_matrix_column_iterator = - dr::iterator_adaptor>; - -template class dense_matrix_column_view { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_reference = std::iter_reference_t; - - using key_type = dr::index<>; - using map_type = T; - - using iterator = dense_matrix_column_iterator; - - dense_matrix_column_view(Iter data, size_type column_idx, size_type size, - size_type ld) - : data_(data), column_idx_(column_idx), size_(size), ld_(ld) {} - - scalar_reference operator[](size_type idx) { return data_[idx * ld_]; } - - iterator begin() const { return iterator(data_, 0, column_idx_, ld_); } - - iterator end() const { return iterator(data_, size_, column_idx_, ld_); } - - size_type size() const noexcept { return size_; } - - Iter data_; - size_type column_idx_; - size_type size_; - size_type ld_; -}; - -template -dense_matrix_column_view(Iter, std::size_t, std::size_t, std::size_t) - -> dense_matrix_column_view, Iter>; - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp deleted file mode 100644 index fb24b6e6963..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#include -#include -#include -#include -#include - -namespace dr::shp { - -template class dense_matrix_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_type = std::iter_value_t; - using scalar_reference = std::iter_reference_t; - - using value_type = dr::shp::matrix_entry; - - using reference = dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = dense_matrix_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using key_type = dr::index<>; - - constexpr dense_matrix_accessor() noexcept = default; - constexpr ~dense_matrix_accessor() noexcept = default; - constexpr dense_matrix_accessor(const dense_matrix_accessor &) noexcept = - default; - constexpr dense_matrix_accessor & - operator=(const dense_matrix_accessor &) noexcept = default; - - constexpr dense_matrix_accessor(Iter data, key_type idx, - key_type matrix_shape, size_type ld) noexcept - : data_(data), idx_(idx), matrix_shape_(matrix_shape), ld_(ld), - idx_offset_({0, 0}) {} - - constexpr dense_matrix_accessor(Iter data, key_type idx, key_type idx_offset, - key_type matrix_shape, size_type ld) noexcept - : data_(data), idx_(idx), matrix_shape_(matrix_shape), ld_(ld), - idx_offset_(idx_offset) {} - - constexpr dense_matrix_accessor &operator+=(difference_type offset) noexcept { - size_type new_idx = get_global_idx() + offset; - idx_ = {new_idx / matrix_shape_[1], new_idx % matrix_shape_[1]}; - - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return idx_ == other.idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(get_global_idx()) - other.get_global_idx(); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - if (idx_[0] < other.idx_[0]) { - return true; - } else if (idx_[0] == other.idx_[0]) { - return idx_[1] < other.idx_[1]; - } else { - return false; - } - } - - constexpr reference operator*() const noexcept { - return reference( - key_type(idx_[0] + idx_offset_[0], idx_[1] + idx_offset_[1]), - data_[idx_[0] * ld_ + idx_[1]]); - } - - Iter data() const noexcept { return data_; } - -private: - size_type get_global_idx() const noexcept { - return idx_[0] * matrix_shape_[1] + idx_[1]; - } - -private: - Iter data_; - key_type idx_; - key_type matrix_shape_; - size_type ld_; - - key_type idx_offset_; -}; - -template -using dense_matrix_iterator = - dr::iterator_adaptor>; - -template -using dense_matrix_view_iterator = dense_matrix_iterator; - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp deleted file mode 100644 index 72a308a2679..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace dr::shp { - -template -class dense_matrix_view - : public rng::view_interface> { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_reference = std::iter_reference_t; - using reference = dr::shp::matrix_ref; - - using key_type = dr::index<>; - using map_type = T; - - using iterator = dense_matrix_view_iterator; - - dense_matrix_view(Iter data, key_type shape, size_type ld, size_type rank) - : data_(data), shape_(shape), idx_offset_(key_type{0, 0}), ld_(ld), - rank_(rank) {} - - dense_matrix_view(Iter data, key_type shape, key_type idx_offset, - size_type ld, size_type rank) - : data_(data), shape_(shape), idx_offset_(idx_offset), ld_(ld), - rank_(rank) {} - - template - requires(std::is_same_v::pointer, - Iter>) - dense_matrix_view(dense_matrix &m) - : data_(m.data()), shape_(m.shape()), idx_offset_(key_type{0, 0}), - ld_(m.ld()), rank_(0) {} - - key_type shape() const noexcept { return shape_; } - - size_type size() const noexcept { return shape()[0] * shape()[1]; } - - scalar_reference operator[](key_type idx) const { - return data_[idx[0] * ld_ + idx[1]]; - } - - iterator begin() const { - return iterator(data_, key_type{0, 0}, idx_offset_, shape_, ld_); - } - - iterator end() const { - return iterator(data_, key_type{shape_[0], 0}, idx_offset_, shape_, ld_); - } - - auto row(size_type row_index) const { - // return dense_matrix_row_view(data_ + row_index * ld_, row_index, - // shape()[1]); - auto row_elements = rng::views::iota(size_type(0), size_type(shape()[1])); - Iter data = data_ + row_index * ld_; - - return row_elements | rng::views::transform([=](auto column_index) { - return reference(key_type(row_index, column_index), - data[column_index]); - }); - } - - auto column(size_type column_index) const { - // return dense_matrix_column_view(data_ + column_index, column_index, - // shape()[0], ld_); - auto column_elements = - rng::views::iota(size_type(0), size_type(shape()[0])); - Iter data = data_ + column_index; - size_type ld = ld_; - - return column_elements | rng::views::transform([=](auto row_index) { - return reference(key_type(row_index, column_index), - data[row_index * ld]); - }); - } - - Iter data() const { return data_; } - - std::size_t rank() const { return rank_; } - - size_type ld() const { return ld_; } - - auto local() const { - auto local_data = __detail::local(data_); - return dense_matrix_view( - local_data, shape_, idx_offset_, ld(), rank()); - } - -private: - Iter data_; - key_type shape_; - key_type idx_offset_; - size_type ld_; - size_type rank_; -}; - -template -dense_matrix_view(Iter, dr::index<>, std::size_t) - -> dense_matrix_view, Iter>; - -template -dense_matrix_view(Iter, dr::index<>) - -> dense_matrix_view, Iter>; - -template -dense_matrix_view(dense_matrix &) - -> dense_matrix_view::pointer>; - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp deleted file mode 100644 index 5501e249cf8..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include -#include - -namespace dr::shp { -template class dense_matrix_row_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_value_type = std::iter_value_t; - using scalar_reference = std::iter_reference_t; - - using value_type = dr::shp::matrix_entry; - - using reference = dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = dense_matrix_row_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using key_type = dr::index<>; - - constexpr dense_matrix_row_accessor() noexcept = default; - constexpr ~dense_matrix_row_accessor() noexcept = default; - constexpr dense_matrix_row_accessor( - const dense_matrix_row_accessor &) noexcept = default; - constexpr dense_matrix_row_accessor & - operator=(const dense_matrix_row_accessor &) noexcept = default; - - constexpr dense_matrix_row_accessor(Iter data, std::size_t i, - std::size_t j) noexcept - : data_(data), i_(i), j_(j) {} - - constexpr dense_matrix_row_accessor & - operator+=(difference_type offset) noexcept { - j_ += offset; - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return j_ == other.j_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(j_) - difference_type(other.j_); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - return j_ < other.j_; - } - - constexpr reference operator*() const noexcept { - return reference(key_type({i_, j_}), data_[j_]); - } - -private: - size_type i_, j_; - - Iter data_; -}; - -template -using dense_matrix_row_iterator = - dr::iterator_adaptor>; - -template class dense_matrix_row_view { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_reference = std::iter_reference_t; - - using key_type = dr::index<>; - using map_type = T; - - using iterator = dense_matrix_row_iterator; - - dense_matrix_row_view(Iter data, size_type row_idx, size_type size) - : data_(data), row_idx_(row_idx), size_(size) {} - - scalar_reference operator[](size_type idx) { return data_[idx]; } - - iterator begin() const { return iterator(data_, row_idx_, 0); } - - iterator end() const { return iterator(data_, row_idx_, size_); } - - size_type size() const noexcept { return size_; } - - Iter data_; - size_type row_idx_; - size_type size_; -}; - -template -dense_matrix_row_view(Iter, std::size_t, std::size_t) - -> dense_matrix_row_view, Iter>; - -} // namespace dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp index b2518a8ea6c..2818d82cd4c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp @@ -4,9 +4,9 @@ #pragma once -#include +#include -namespace dr::shp { +namespace experimental::dr::shp { namespace views { @@ -30,7 +30,7 @@ class enumerate_adapter_closure { requires(rng::sized_range) auto operator()(R &&r) const { using W = std::uint32_t; - return dr::shp::zip_view(rng::views::iota(W(0), W(rng::size(r))), + return experimental::dr::shp::zip_view(rng::views::iota(W(0), W(rng::size(r))), std::forward(r)); } @@ -53,4 +53,4 @@ inline constexpr auto enumerate = enumerate_fn_{}; } // namespace views -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp index 1b4ba1aafe7..e5f14ae6c38 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp @@ -4,26 +4,28 @@ #pragma once -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include -namespace dr::shp { +namespace experimental::dr::shp { namespace views { -template -auto slice(R &&r, dr::index<> slice_indices) { - return dr::shp::distributed_span(dr::ranges::segments(std::forward(r))) +template +auto slice(R &&r, experimental::dr::index<> slice_indices) { + return experimental::dr::shp::distributed_span( + experimental::dr::ranges::segments(std::forward(r))) .subspan(slice_indices[0], slice_indices[1] - slice_indices[0]); } class slice_adaptor_closure { public: - slice_adaptor_closure(dr::index<> slice_indices) : idx_(slice_indices) {} + slice_adaptor_closure(experimental::dr::index<> slice_indices) + : idx_(slice_indices) {} template auto operator()(R &&r) const { return slice(std::forward(r), idx_); @@ -35,13 +37,13 @@ class slice_adaptor_closure { } private: - dr::index<> idx_; + experimental::dr::index<> idx_; }; -inline auto slice(dr::index<> slice_indices) { +inline auto slice(experimental::dr::index<> slice_indices) { return slice_adaptor_closure(slice_indices); } } // namespace views -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp index 0a66aa5f010..3b763c6a897 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp @@ -4,12 +4,12 @@ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include -namespace dr::shp::views { +namespace experimental::dr::shp::views { inline constexpr auto all = rng::views::all; @@ -17,10 +17,10 @@ inline constexpr auto counted = rng::views::counted; inline constexpr auto drop = rng::views::drop; -inline constexpr auto iota = dr::views::iota; +inline constexpr auto iota = experimental::dr::views::iota; inline constexpr auto take = rng::views::take; -inline constexpr auto transform = dr::views::transform; +inline constexpr auto transform = experimental::dr::views::transform; -} // namespace dr::shp::views +} // namespace experimental::dr::shp::views diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp index c24b0b2f632..ff556646000 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp @@ -6,13 +6,13 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include -namespace dr { +namespace experimental::dr { template struct is_owning_view : std::false_type {}; // template @@ -21,9 +21,9 @@ template struct is_owning_view : std::false_type {}; template inline constexpr bool is_owning_view_v = is_owning_view{}; -}; // namespace dr +}; // namespace experimental::dr -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -101,7 +101,7 @@ template class zip_accessor { }; template -using zip_iterator = dr::iterator_adaptor>; +using zip_iterator = experimental::dr::iterator_adaptor>; /// zip template @@ -137,8 +137,8 @@ class zip_view : public rng::view_interface> { template decltype(auto) get_view() const { auto &&view = std::get(views_); - if constexpr (dr::is_ref_view_v> || - dr::is_owning_view_v>) { + if constexpr (experimental::dr::is_ref_view_v> || + experimental::dr::is_owning_view_v>) { return view.base(); } else { return view; @@ -148,7 +148,7 @@ class zip_view : public rng::view_interface> { // If there is at least one distributed range, expose segments // of overlapping remote ranges. auto segments() const - requires(dr::distributed_range || ...) + requires(experimental::dr::distributed_range || ...) { std::array segment_ids; std::array local_idx; @@ -178,14 +178,14 @@ class zip_view : public rng::view_interface> { increment_local_idx(segment_ids, local_idx, size); } - return dr::__detail::owning_view(std::move(segment_views)); + return experimental::dr::__detail::owning_view(std::move(segment_views)); } // Return a range corresponding to each segment in `segments()`, // but with a tuple of the constituent ranges instead of a // `zip_view` of the ranges. auto zipped_segments() const - requires(dr::distributed_range || ...) + requires(experimental::dr::distributed_range || ...) { std::array segment_ids; std::array local_idx; @@ -214,11 +214,11 @@ class zip_view : public rng::view_interface> { increment_local_idx(segment_ids, local_idx, size); } - return dr::__detail::owning_view(std::move(segment_views)); + return experimental::dr::__detail::owning_view(std::move(segment_views)); } auto local() const noexcept - requires(!(dr::distributed_range || ...)) + requires(!(experimental::dr::distributed_range || ...)) { return local_impl_(std::make_index_sequence()); } @@ -228,8 +228,8 @@ class zip_view : public rng::view_interface> { // - There are no distributed ranges in the zip // Expose a rank. std::size_t rank() const - requires((dr::remote_range || ...) && - !(dr::distributed_range || ...)) + requires((experimental::dr::remote_range || ...) && + !(experimental::dr::distributed_range || ...)) { return get_rank_impl_<0, Rs...>(); } @@ -242,25 +242,25 @@ class zip_view : public rng::view_interface> { template std::size_t get_rank_impl_() const { static_assert(I < sizeof...(Rs)); - return dr::ranges::rank(get_view()); + return experimental::dr::ranges::rank(get_view()); } template requires(sizeof...(Rs_) > 0) std::size_t get_rank_impl_() const { static_assert(I < sizeof...(Rs)); - if constexpr (dr::remote_range) { - return dr::ranges::rank(get_view()); + if constexpr (experimental::dr::remote_range) { + return experimental::dr::ranges::rank(get_view()); } else { return get_rank_impl_(); } } template auto create_view_impl_(T &&t) const { - if constexpr (dr::remote_range) { - return dr::shp::device_span(std::forward(t)); + if constexpr (experimental::dr::remote_range) { + return experimental::dr::shp::device_span(std::forward(t)); } else { - return dr::shp::span(std::forward(t)); + return experimental::dr::shp::span(std::forward(t)); } } @@ -307,9 +307,9 @@ class zip_view : public rng::view_interface> { rng::begin(std::get(views_))...); } - template + template decltype(auto) segment_or_orig_(T &&t, std::size_t idx) const { - return dr::ranges::segments(t)[idx]; + return experimental::dr::ranges::segments(t)[idx]; } template @@ -341,9 +341,9 @@ namespace views { /// Zip template auto zip(Rs &&...rs) { - return dr::shp::zip_view(std::forward(rs)...); + return experimental::dr::shp::zip_view(std::forward(rs)...); } } // namespace views -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/source_location/source_location.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/source_location/source_location.hpp deleted file mode 100644 index a6bd21b9c78..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/source_location/source_location.hpp +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#ifndef NOSTD_SOURCE_LOCATION_HPP -#define NOSTD_SOURCE_LOCATION_HPP - -#pragma once - -#include - -namespace nostd { -struct source_location { -public: -#if not defined(__apple_build_version__) and defined(__clang__) and \ - (__clang_major__ >= 9) - static constexpr source_location - current(const char *fileName = __builtin_FILE(), - const char *functionName = __builtin_FUNCTION(), - const uint_least32_t lineNumber = __builtin_LINE(), - const uint_least32_t columnOffset = __builtin_COLUMN()) noexcept -#elif defined(__GNUC__) and \ - (__GNUC__ > 4 or (__GNUC__ == 4 and __GNUC_MINOR__ >= 8)) - static constexpr source_location - current(const char *fileName = __builtin_FILE(), - const char *functionName = __builtin_FUNCTION(), - const uint_least32_t lineNumber = __builtin_LINE(), - const uint_least32_t columnOffset = 0) noexcept -#else - static constexpr source_location - current(const char *fileName = "unsupported", - const char *functionName = "unsupported", - const uint_least32_t lineNumber = 0, - const uint_least32_t columnOffset = 0) noexcept -#endif - { - return source_location(fileName, functionName, lineNumber, columnOffset); - } - - source_location(const source_location &) = default; - source_location(source_location &&) = default; - - constexpr const char *file_name() const noexcept { return fileName; } - - constexpr const char *function_name() const noexcept { return functionName; } - - constexpr uint_least32_t line() const noexcept { return lineNumber; } - - constexpr std::uint_least32_t column() const noexcept { return columnOffset; } - -private: - constexpr source_location(const char *fileName, const char *functionName, - const uint_least32_t lineNumber, - const uint_least32_t columnOffset) noexcept - : fileName(fileName), functionName(functionName), lineNumber(lineNumber), - columnOffset(columnOffset) {} - - const char *fileName; - const char *functionName; - const std::uint_least32_t lineNumber; - const std::uint_least32_t columnOffset; -}; -} // namespace nostd - -#endif diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp index 3301bfa0c1e..82a227ef445 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp @@ -4,7 +4,7 @@ #pragma once -namespace dr::views { +namespace experimental::dr::views { // // range-v3 iota uses sentinels that are not the same type as the @@ -24,4 +24,4 @@ struct iota_fn_ { inline constexpr auto iota = iota_fn_{}; -} // namespace dr::views +} // namespace experimental::dr::views diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp index af59ab70eaa..d8a3a23bfc9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp @@ -8,10 +8,10 @@ #include #include -#include -#include +#include +#include -namespace dr { +namespace experimental::dr { template class transform_iterator { @@ -106,9 +106,9 @@ class transform_iterator { } auto local() const - requires(dr::ranges::__detail::has_local) + requires(experimental::dr::ranges::__detail::has_local) { - auto iter = dr::ranges::__detail::local(iter_); + auto iter = experimental::dr::ranges::__detail::local(iter_); return transform_iterator(iter, fn_); } @@ -135,10 +135,10 @@ class transform_view : public rng::view_interface> { } auto segments() const - requires(dr::distributed_range) + requires(experimental::dr::distributed_range) { auto fn = fn_; - return dr::ranges::segments(base_) | + return experimental::dr::ranges::segments(base_) | rng::views::transform([fn](T &&segment) { return transform_view, F>( std::forward(segment), fn); @@ -146,9 +146,9 @@ class transform_view : public rng::view_interface> { } auto rank() const - requires(dr::remote_range) + requires(experimental::dr::remote_range) { - return dr::ranges::rank(base_); + return experimental::dr::ranges::rank(base_); } V base() const { return base_; } @@ -168,7 +168,7 @@ template class transform_adapter_closure { transform_adapter_closure(F fn) : fn_(fn) {} template auto operator()(R &&r) const { - return dr::transform_view(std::forward(r), fn_); + return experimental::dr::transform_view(std::forward(r), fn_); } template @@ -195,13 +195,13 @@ class transform_fn_ { inline constexpr auto transform = transform_fn_{}; } // namespace views -} // namespace dr +} // namespace experimental::dr #if !defined(DR_SPEC) // Needed to satisfy rng::viewable_range template -inline constexpr bool rng::enable_borrowed_range> = +inline constexpr bool rng::enable_borrowed_range> = true; #endif diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp index 556beaba39e..6ba6ef64806 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp @@ -4,15 +4,15 @@ #pragma once -#include -#include +#include +#include -namespace dr { +namespace experimental::dr { // returns range: [(rank, element) ...] -auto ranked_view(const dr::distributed_range auto &r) { - auto rank = [](auto &&v) { return dr::ranges::rank(&v); }; +auto ranked_view(const experimental::dr::distributed_range auto &r) { + auto rank = [](auto &&v) { return experimental::dr::ranges::rank(&v); }; return rng::views::zip(rng::views::transform(r, rank), r); } -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h index 79d8239ea12..cd33872aa0d 100644 --- a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h @@ -37,12 +37,15 @@ class ExclusiveScan1; template class ExclusiveScan2; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) +template +OutputIterator +pattern_exclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, + Operator binary_op) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + const auto n = ::std::distance(first1, last1); // Check for empty and single element ranges @@ -61,7 +64,7 @@ pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI InputIterator2 last2 = first2 + n; // compute head flags - oneapi::dpl::__par_backend::__buffer _flags(n); + oneapi::dpl::__par_backend::__buffer _flags(policy, n); auto flags = _flags.get(); flags[0] = 1; @@ -69,7 +72,7 @@ pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI oneapi::dpl::__internal::__not_pred(binary_pred)); // shift input one to the right and initialize segments with init - oneapi::dpl::__par_backend::__buffer _temp(n); + oneapi::dpl::__par_backend::__buffer _temp(policy, n); auto temp = _temp.get(); temp[0] = init; @@ -91,22 +94,24 @@ pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, +template +OutputIterator +exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, ::std::true_type /* has_known_identity*/) { - return internal::__scan_by_segment_impl_common(::std::forward(policy), first1, last1, first2, result, init, - binary_pred, binary_op, ::std::false_type{}); + return internal::__scan_by_segment_impl_common(__tag, ::std::forward(policy), first1, last1, first2, result, + init, binary_pred, binary_op, ::std::false_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, +template +OutputIterator +exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, ::std::false_type /* has_known_identity*/) { @@ -160,14 +165,15 @@ exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIter return result + n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) +template +OutputIterator +pattern_exclusive_scan_by_segment(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op) { return internal::exclusive_scan_by_segment_impl( - ::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, + __tag, ::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, typename unseq_backend::__has_known_identity< Operator, typename ::std::iterator_traits::value_type>::type{}); } @@ -181,8 +187,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) { - return internal::pattern_exclusive_scan_by_segment(::std::forward(policy), first1, last1, first2, result, - init, binary_pred, binary_op); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, first1, first2, result); + + return internal::pattern_exclusive_scan_by_segment(__dispatch_tag, ::std::forward(policy), first1, last1, + first2, result, init, binary_pred, binary_op); } template class InclusiveScan1; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) +OutputIterator +pattern_inclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, + BinaryOperator binary_op) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + const auto n = ::std::distance(first1, last1); // Check for empty and single element ranges @@ -56,7 +59,7 @@ pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI typedef unsigned int FlagType; typedef typename ::std::iterator_traits::value_type ValueType; - oneapi::dpl::__par_backend::__buffer _mask(n); + oneapi::dpl::__par_backend::__buffer _mask(policy, n); auto mask = _mask.get(); mask[0] = 1; @@ -72,24 +75,26 @@ pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, +template +OutputIterator +inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op, ::std::true_type /* has_known_identity */) { using iter_value_t = typename ::std::iterator_traits::value_type; iter_value_t identity = unseq_backend::__known_identity; - return internal::__scan_by_segment_impl_common(::std::forward(policy), first1, last1, first2, result, + return internal::__scan_by_segment_impl_common(__tag, ::std::forward(policy), first1, last1, first2, result, identity, binary_pred, binary_op, ::std::true_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, +template +OutputIterator +inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op, ::std::false_type /* has_known_identity */) { @@ -123,14 +128,15 @@ inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIter return result + n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) +template +OutputIterator +pattern_inclusive_scan_by_segment(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op) { return internal::inclusive_scan_by_segment_impl( - ::std::forward(policy), first1, last1, first2, result, binary_pred, binary_op, + __tag, ::std::forward(policy), first1, last1, first2, result, binary_pred, binary_op, typename unseq_backend::__has_known_identity< BinaryOperator, typename ::std::iterator_traits::value_type>::type{}); } @@ -144,8 +150,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) { - return internal::pattern_inclusive_scan_by_segment(::std::forward(policy), first1, last1, first2, result, - binary_pred, binary_op); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, first1, first2, result); + + return internal::pattern_inclusive_scan_by_segment(__dispatch_tag, ::std::forward(policy), first1, last1, + first2, result, binary_pred, binary_op); } template diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 0102d6c2925..683decf3a49 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -59,6 +59,7 @@ #include "../pstl/ranges_defs.h" #include "../pstl/glue_algorithm_ranges_defs.h" #include "../pstl/glue_algorithm_ranges_impl.h" +#include "../pstl/hetero/dpcpp/sycl_traits.h" //SYCL traits specialization for some oneDPL types. #include "scan_by_segment_impl.h" #endif @@ -78,13 +79,15 @@ class Reduce3; template class Reduce4; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy> -reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, +::std::pair +reduce_by_segment_impl(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, BinaryOperator binary_op) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + // The algorithm reduces values in [first2, first2 + (last1-first1)) where the associated // keys for the values are equal to the adjacent key. This function's implementation is a derivative work // and responsible for the second copyright notice in this header. @@ -112,7 +115,7 @@ reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 la // buffer that is used to store a flag indicating if the associated key is not equal to // the next key, and thus its associated sum should be part of the final result - oneapi::dpl::__par_backend::__buffer _mask(n + 1); + oneapi::dpl::__par_backend::__buffer _mask(policy, n + 1); auto mask = _mask.get(); mask[0] = 1; @@ -128,11 +131,11 @@ reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 la // buffer stores the sums of values associated with a given key. Sums are copied with // a shift into result2, and the shift is computed at the same time as the sums, so the // sums can't be written to result2 directly. - oneapi::dpl::__par_backend::__buffer _scanned_values(n); + oneapi::dpl::__par_backend::__buffer _scanned_values(policy, n); // Buffer is used to store results of the scan of the mask. Values indicate which position // in result2 needs to be written with the scanned_values element. - oneapi::dpl::__par_backend::__buffer _scanned_tail_flags(n); + oneapi::dpl::__par_backend::__buffer _scanned_tail_flags(policy, n); // Compute the sum of the segments. scanned_tail_flags values are not used. inclusive_scan(policy, make_zip_iterator(first2, _mask.get()), make_zip_iterator(first2, _mask.get()) + n, @@ -188,12 +191,12 @@ template using _SegReducePrefixPhase = __seg_reduce_prefix_kernel<_Name...>; } // namespace -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, - _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, +template +oneapi::dpl::__internal::__difference_t<_Range3> +__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, + _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, + _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, ::std::false_type /* has_known_identity */) { return oneapi::dpl::experimental::ranges::reduce_by_segment( @@ -201,12 +204,12 @@ __sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& ::std::forward<_Range3>(__out_keys), ::std::forward<_Range4>(__out_values), __binary_pred, __binary_op); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, - _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, +template +oneapi::dpl::__internal::__difference_t<_Range3> +__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, + _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, + _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, ::std::true_type /* has_known_identity */) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -570,12 +573,12 @@ __sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& return __end_idx.get_host_access()[0] + 1; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy> -reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, - BinaryOperator binary_op) +template +::std::pair +reduce_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, + BinaryPred binary_pred, BinaryOperator binary_op) { // The algorithm reduces values in [first2, first2 + (last1-first1)) where the associated // keys for the values are equal to the adjacent key. @@ -609,9 +612,9 @@ reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 la typename ::std::iterator_traits::value_type>::type; // number of unique keys - _CountType __n = __sycl_reduce_by_segment(::std::forward(policy), key_buf.all_view(), value_buf.all_view(), - key_output_buf.all_view(), value_output_buf.all_view(), binary_pred, - binary_op, has_known_identity{}); + _CountType __n = __sycl_reduce_by_segment( + __tag, ::std::forward(policy), key_buf.all_view(), value_buf.all_view(), key_output_buf.all_view(), + value_output_buf.all_view(), binary_pred, binary_op, has_known_identity{}); return ::std::make_pair(result1 + __n, result2 + __n); } @@ -624,8 +627,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy(policy), first1, last1, first2, result1, result2, - binary_pred, binary_op); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, first1, first2, result1, result2); + + return internal::reduce_by_segment_impl(__dispatch_tag, ::std::forward(policy), first1, last1, first2, + result1, result2, binary_pred, binary_op); } template using _SegScanPrefixPhase = __seg_scan_prefix_kernel<__is_inclusive, _Name...>; - template void - operator()(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_values, + operator()(_BackendTag, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, _T __init, _T __identity) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -364,11 +366,12 @@ struct __sycl_scan_by_segment_impl } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -__scan_by_segment_impl_common(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, Inclusive) +template +OutputIterator +__scan_by_segment_impl_common(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, Inclusive) { const auto n = ::std::distance(first1, last1); @@ -389,7 +392,7 @@ __scan_by_segment_impl_common(Policy&& policy, InputIterator1 first1, InputItera constexpr iter_value_t identity = unseq_backend::__known_identity; - __sycl_scan_by_segment_impl()(::std::forward(policy), key_buf.all_view(), + __sycl_scan_by_segment_impl()(_BackendTag{}, ::std::forward(policy), key_buf.all_view(), value_buf.all_view(), value_output_buf.all_view(), binary_pred, binary_op, init, identity); return result + n; diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 34d09140fb0..556490c8d19 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -26,6 +26,11 @@ namespace dpl namespace __internal { +template +struct __parallel_tag; + +struct __parallel_forward_tag; + //------------------------------------------------------------------------ // any_of //------------------------------------------------------------------------ @@ -40,15 +45,13 @@ bool __brick_any_of(const _RandomAccessIterator, const _RandomAccessIterator, _Pred, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred, _IsVector, - /*parallel=*/::std::false_type) noexcept; +template +bool +__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred, _IsVector, - /*parallel=*/::std::true_type); +template +bool +__pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred); //------------------------------------------------------------------------ // walk1 (pseudo) @@ -64,34 +67,26 @@ template void __brick_walk1(_RandomAccessIterator, _RandomAccessIterator, _Function, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; +template +void +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type); +template +void +__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type); +template +void +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick, - /*parallel=*/::std::false_type) noexcept; +template +void +__pattern_walk_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Brick, - /*parallel=*/::std::true_type); +template +void +__pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Brick); //------------------------------------------------------------------------ // walk1_n @@ -105,25 +100,21 @@ template _RandomAccessIterator __brick_walk1_n(_RandomAccessIterator, _DifferenceType, _Function, /*vectorTag=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Function, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_walk1_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Brick, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_walk_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Brick) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk_brick_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick); //------------------------------------------------------------------------ // walk2 (pseudo) @@ -147,65 +138,58 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Function, _IsVector, - /*parallel=*/::std::true_type); +template +_ForwardIterator2 +__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Function); + +template +_ForwardIterator2 +__pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Function); + +template +_ForwardIterator2 +__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, + _Function); + +template +_ForwardIterator2 +__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Brick) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Brick); template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Brick, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Brick, - /*parallel=*/::std::true_type); +_ForwardIterator2 +__pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, + _ForwardIterator2, _Brick); + +template +_ForwardIterator2 +__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, + _RandomAccessIterator2, _Brick); //------------------------------------------------------------------------ // walk3 (pseudo) @@ -222,50 +206,39 @@ _RandomAccessIterator3 __brick_walk3(_RandomAccessIterator1, _RandomAccessIterat _RandomAccessIterator3, _Function, /*vector=*/::std::true_type) noexcept; +template +_ForwardIterator3 +__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, + _Function) noexcept; + +template +_RandomAccessIterator3 +__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator3, _Function); + template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, - _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - !__is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type); + class _Function> +_ForwardIterator3 +__pattern_walk3(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator3, _Function); //------------------------------------------------------------------------ // transform_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, _IsVector __is_vector, - _IsParallel __is_parallel) noexcept; +template +_ForwardIterator2 +__pattern_walk2_transform_if(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - _IsVector __is_vector, _IsParallel __is_parallel) noexcept; +template +_ForwardIterator3 +__pattern_walk3_transform_if(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator3, _Function) noexcept; //------------------------------------------------------------------------ // equal @@ -279,17 +252,16 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _BinaryPredicate, - _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); template bool __brick_equal(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate, @@ -299,17 +271,16 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // find_if @@ -323,15 +294,14 @@ template _RandomAccessIterator __brick_find_if(_RandomAccessIterator, _RandomAccessIterator, _Predicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_find_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_find_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Predicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_find_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Predicate); //------------------------------------------------------------------------ // find_end @@ -347,19 +317,16 @@ _RandomAccessIterator1 __brick_find_end(_RandomAccessIterator1, _RandomAccessIte _RandomAccessIterator2, _BinaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_end(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator1 +__pattern_find_end(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // find_first_of @@ -375,17 +342,16 @@ _RandomAccessIterator1 __brick_find_first_of(_RandomAccessIterator1, _RandomAcce _RandomAccessIterator2, _BinaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_first_of(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator1 +__pattern_find_first_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // search @@ -401,19 +367,16 @@ _RandomAccessIterator1 __brick_search(_RandomAccessIterator1, _RandomAccessItera _RandomAccessIterator2, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_search(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_search(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator1 +__pattern_search(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // search_n @@ -429,31 +392,28 @@ _RandomAccessIterator __brick_search_n(_RandomAccessIterator, _RandomAccessIterator, _Size, const _Tp&, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_search_n(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&, _BinaryPredicate, - IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_search_n(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Size, const _Tp&, - _BinaryPredicate, IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Size, + const _Tp&, _BinaryPredicate); //------------------------------------------------------------------------ // copy_n //------------------------------------------------------------------------ -template +template struct __brick_copy_n; -template +template struct __brick_copy; -template +template struct __brick_move; //------------------------------------------------------------------------ @@ -510,16 +470,16 @@ void __brick_partition_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, _OutputIterator2, bool*, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryPredicate, _IsVector, - /*parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _UnaryPredicate, - _IsVector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _UnaryPredicate); //------------------------------------------------------------------------ // count @@ -535,17 +495,14 @@ typename ::std::iterator_traits<_ForwardIterator>::difference_type __brick_count(_ForwardIterator, _ForwardIterator, _Predicate, /* is_vector = */ ::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate, - /* is_parallel */ ::std::false_type, _IsVector) noexcept; +template +typename ::std::iterator_traits<_ForwardIterator>::difference_type +__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_RandomAccessIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Predicate, - /* is_parallel */ ::std::true_type, _IsVector); +template +typename ::std::iterator_traits<_RandomAccessIterator>::difference_type +__pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Predicate); //------------------------------------------------------------------------ // unique @@ -559,15 +516,14 @@ template _RandomAccessIterator __brick_unique(_RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_unique(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_unique(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _BinaryPredicate); //------------------------------------------------------------------------ // unique_copy @@ -581,11 +537,10 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryPredicate, - _IsVector, /*parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _BinaryPredicate) noexcept; template _DifferenceType @@ -597,11 +552,11 @@ _DifferenceType __brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _BinaryPredicate, _IsVector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // reverse @@ -623,15 +578,13 @@ template void __brick_reverse(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _IsVector, - /*is_parallel=*/::std::true_type); +template +void +__pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator); //------------------------------------------------------------------------ // reverse_copy @@ -645,15 +598,15 @@ template _OutputIterator __brick_reverse_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _OutputIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _OutputIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2); //------------------------------------------------------------------------ // rotate @@ -667,40 +620,36 @@ template _RandomAccessIterator __brick_rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_rotate(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator); //------------------------------------------------------------------------ // rotate_copy //------------------------------------------------------------------------ -template -_OutputIterator __brick_rotate_copy(_ForwardIterator, _ForwardIterator, _ForwardIterator, _OutputIterator, - /*__is_vector=*/::std::false_type) noexcept; +template +_OutputIterator __brick_rotate_copy(_Tag, _ForwardIterator, _ForwardIterator, _ForwardIterator, + _OutputIterator) noexcept; -template -_OutputIterator __brick_rotate_copy(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _OutputIterator, - /*__is_vector=*/::std::true_type) noexcept; +template +_OutputIterator __brick_rotate_copy(__parallel_tag<_IsVector>, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _OutputIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, _OutputIterator, - _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, + _OutputIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _OutputIterator, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2); //------------------------------------------------------------------------ // is_partitioned @@ -714,15 +663,14 @@ template bool __brick_is_partitioned(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +bool +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); //------------------------------------------------------------------------ // partition @@ -736,15 +684,14 @@ template _RandomAccessIterator __brick_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_partition(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partition(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); //------------------------------------------------------------------------ // stable_partition @@ -758,16 +705,15 @@ template _RandomAccessIterator __brick_stable_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _UnaryPredicate, - _IsVector, - /*is_parallelization=*/::std::false_type) noexcept; +template +_BidirectionalIterator +__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::true_type); +template +_RandomAccessIterator +__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); //------------------------------------------------------------------------ // partition_copy @@ -784,104 +730,88 @@ ::std::pair<_OutputIterator1, _OutputIterator2> __brick_partition_copy(_RandomAc _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1, _OutputIterator2, - _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::false_type) noexcept; +template +::std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1, + _OutputIterator2, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, - _OutputIterator2, _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::true_type); +template +::std::pair<_RandomAccessIterator2, _RandomAccessIterator3> +__pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator3, _UnaryPredicate); //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, - /*is_parallel=*/::std::false_type, _IsMoveConstructible) noexcept; +template +void +__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, + _IsMoveConstructible) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, - /*is_parallel=*/::std::true_type, +template +void +__pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, /*is_move_constructible=*/::std::true_type); //------------------------------------------------------------------------ // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector /*is_vector*/, - /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector /*is_vector*/, - /*is_parallel=*/::std::true_type); +template +void +__pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // sort_by_key //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::true_type); +template +void +__pattern_sort_by_key(_Tag, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _Compare) noexcept; + +template +void +__pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Compare); //------------------------------------------------------------------------ // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +void +__pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); //------------------------------------------------------------------------ // partial_sort_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare) noexcept; //------------------------------------------------------------------------ // adjacent_find @@ -897,59 +827,54 @@ _ForwardIterator __brick_adjacent_find(_ForwardIterator, _ForwardIterator, _BinaryPredicate, /* IsVector = */ ::std::false_type, bool) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, - /* is_parallel */ ::std::false_type, _IsVector, _Semantic) noexcept; +template +_ForwardIterator +__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, + _Semantic) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, - /* is_parallel */ ::std::true_type, _IsVector, _Semantic); +template +_RandomAccessIterator +__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _BinaryPredicate, _Semantic); //------------------------------------------------------------------------ // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector, - /*is_parallel=*/::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector, - /*is_parallel=*/::std::true_type); +template +void +__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; + +template +void +__pattern_nth_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); //------------------------------------------------------------------------ // fill, fill_n //------------------------------------------------------------------------ -template +template struct __brick_fill; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_fill(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +void +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_fill(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&, - /*is_parallel=*/::std::true_type, _IsVector); +template +_RandomAccessIterator +__pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&); -template +template struct __brick_fill_n; -template +template _OutputIterator -__pattern_fill_n(_ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; +__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&) noexcept; -template +template _RandomAccessIterator -__pattern_fill_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&, - /*is_parallel=*/::std::true_type, _IsVector); +__pattern_fill_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&); //------------------------------------------------------------------------ // generate, generate_n @@ -963,15 +888,14 @@ template void __brick_generate(_ForwardIterator, _ForwardIterator, _Generator, /* is_vector = */ ::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_generate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +void +__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_generate(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Generator, - /*is_parallel=*/::std::true_type, _IsVector); +template +_RandomAccessIterator +__pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Generator); template _RandomAccessIterator __brick_generate_n(_RandomAccessIterator, Size, _Generator, @@ -981,15 +905,13 @@ template OutputIterator __brick_generate_n(OutputIterator, Size, _Generator, /* is_vector = */ ::std::false_type) noexcept; -template -OutputIterator -__pattern_generate_n(_ExecutionPolicy&&, OutputIterator, Size, _Generator, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +_OutputIterator +__pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, _Generator) noexcept; -template +template _RandomAccessIterator -__pattern_generate_n(_ExecutionPolicy&&, _RandomAccessIterator, Size, _Generator, - /*is_parallel=*/::std::true_type, _IsVector); +__pattern_generate_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Generator); //------------------------------------------------------------------------ // remove @@ -1002,15 +924,14 @@ template _RandomAccessIterator __brick_remove_if(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel*/ ::std::false_type) noexcept; +template +_ForwardIterator +__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_remove_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel*/ ::std::true_type); +template +_RandomAccessIterator +__pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); //------------------------------------------------------------------------ // merge @@ -1026,18 +947,17 @@ _OutputIterator __brick_merge(_RandomAccessIterator1, _RandomAccessIterator1, _R _RandomAccessIterator2, _OutputIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +_OutputIterator +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator3 +__pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator3, _Compare); //------------------------------------------------------------------------ // inplace_merge @@ -1051,34 +971,30 @@ template void __brick_inplace_merge(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _BidirectionalIterator, - _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +void +__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _BidirectionalIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +void +__pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); //------------------------------------------------------------------------ // includes //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +bool +__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +bool +__pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Compare); //------------------------------------------------------------------------ // set_union @@ -1094,17 +1010,17 @@ _OutputIterator __brick_set_union(_RandomAccessIterator1, _RandomAccessIterator1 _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +_OutputIterator +__pattern_set_union(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare); //------------------------------------------------------------------------ // set_intersection @@ -1120,19 +1036,18 @@ _OutputIterator __brick_set_intersection(_RandomAccessIterator1, _RandomAccessIt _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_intersection(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _RandomAccessIterator3, _Compare); //------------------------------------------------------------------------ // set_difference @@ -1148,18 +1063,17 @@ _OutputIterator __brick_set_difference(_RandomAccessIterator1, _RandomAccessIter _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator3, _Compare); //------------------------------------------------------------------------ // set_symmetric_difference @@ -1175,19 +1089,18 @@ _OutputIterator __brick_set_symmetric_difference(_RandomAccessIterator1, _Random _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, - _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_symmetric_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _RandomAccessIterator3, _Compare); //------------------------------------------------------------------------ // is_heap_until @@ -1201,15 +1114,14 @@ template _RandomAccessIterator __brick_is_heap_until(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +_RandomAccessIterator +__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator +__pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // is_heap @@ -1223,15 +1135,14 @@ template bool __brick_is_heap(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +bool +__pattern_is_heap(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // min_element @@ -1245,15 +1156,14 @@ template _RandomAccessIterator __brick_min_element(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_min_element(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +_ForwardIterator +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_min_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator +__pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // minmax_element @@ -1268,17 +1178,14 @@ ::std::pair<_RandomAccessIterator, _RandomAccessIterator> __brick_minmax_element(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator, _ForwardIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +::std::pair<_ForwardIterator, _ForwardIterator> +__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator, _RandomAccessIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +::std::pair<_RandomAccessIterator, _RandomAccessIterator> +__pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // mismatch @@ -1295,19 +1202,16 @@ ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> _Predicate, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator1, _ForwardIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _Predicate, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +::std::pair<_ForwardIterator1, _ForwardIterator2> +__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Predicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__pattern_mismatch(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Predicate); //------------------------------------------------------------------------ // lexicographical_compare @@ -1323,24 +1227,21 @@ bool __brick_lexicographical_compare(_RandomAccessIterator1, _RandomAccessIterat _RandomAccessIterator2, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, - _RandomAccessIterator2, _RandomAccessIterator2, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +bool +__pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function, _IsVector, - _IsParallel); +template +_ForwardIterator2 +__pattern_swap(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function); //------------------------------------------------------------------------ // shift_left @@ -1356,23 +1257,24 @@ _ForwardIterator __brick_shift_left(_ForwardIterator, _ForwardIterator, typename ::std::iterator_traits<_ForwardIterator>::difference_type, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, - typename ::std::iterator_traits<_ForwardIterator>::difference_type, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, - typename ::std::iterator_traits<_ForwardIterator>::difference_type, _IsVector, - /*is_parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_shift_right(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, - typename ::std::iterator_traits<_BidirectionalIterator>::difference_type, _IsVector, - _IsParallel is_parallel); +template +_ForwardIterator +__pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, + typename ::std::iterator_traits<_ForwardIterator>::difference_type) noexcept; + +template +_RandomAccessIterator +__pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + typename ::std::iterator_traits<_RandomAccessIterator>::difference_type); + +//------------------------------------------------------------------------ +// shift_right +//------------------------------------------------------------------------ + +template +_BidirectionalIterator +__pattern_shift_right(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + typename ::std::iterator_traits<_BidirectionalIterator>::difference_type); } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 628e7ad09b8..7f9db008b45 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -63,23 +63,24 @@ __brick_any_of(const _RandomAccessIterator __first, const _RandomAccessIterator return __unseq_backend::__simd_or(__first, __last - __first, __pred); }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred, - _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +bool +__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) noexcept { - return __internal::__brick_any_of(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_any_of(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Pred __pred, - _IsVector __is_vector, /*parallel=*/::std::true_type) +template +bool +__pattern_any_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Pred __pred) { return __internal::__except_handler([&]() { - return __internal::__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__brick_any_of(__i, __j, __pred, __is_vector); + return __internal::__parallel_or(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__brick_any_of(__i, __j, __pred, _IsVector{}); }); }); } @@ -134,64 +135,66 @@ __brick_walk1(_DifferenceType __n, _Function __f, ::std::true_type) noexcept oneapi::dpl::__internal::__brick_walk1(__n, __f, ::std::false_type{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::false_type) noexcept +template +void +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f) noexcept { - __internal::__brick_walk1(__first, __last, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag>); + + __internal::__brick_walk1(__first, __last, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type) +template +void +__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + + typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; + auto __func = [&__f](_ReferenceType arg) { __f(arg); }; __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__f, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __internal::__brick_walk1(__i, __j, __f, __is_vector); - }); + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __func); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - !__is_random_access_iterator_v<_ForwardIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, _IsVector, - /*parallel=*/::std::true_type) +template +void +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Function __f) { - typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - auto __func = [&__f](_ReferenceType arg) { __f(arg); }; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__f](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __internal::__brick_walk1(__i, __j, __f, _IsVector{}); + }); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Brick __brick, - /*parallel=*/::std::false_type) noexcept +template +void +__pattern_walk_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Brick __brick) noexcept { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - __brick(__first, __last, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __brick(__first, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Brick __brick, - /*parallel=*/::std::true_type) +template +void +__pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Brick __brick) { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __brick(__i, __j, __is_vector); - }); + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j, _IsVector{}); }); }); } @@ -214,45 +217,45 @@ __brick_walk1_n(_RandomAccessIterator __first, _DifferenceType __n, _Function __ return __unseq_backend::__simd_walk_1(__first, __n, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f) { - return __internal::__brick_walk1_n(__first, __n, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_walk1_n(__first, __n, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk1_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Function __f, - _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_walk1_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, + _Function __f) { - oneapi::dpl::__internal::__pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f, - __is_vector, ::std::true_type()); + oneapi::dpl::__internal::__pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, + __f); return __first + __n; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_walk_brick_n(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick) noexcept { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - return __brick(__first, __n, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick(__first, __n, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Brick __brick, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, + _Brick __brick) { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, - [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __brick(__i, __j - __i, __is_vector); - }); + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, + [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j - __i, _IsVector{}); }); return __first + __n; }); } @@ -299,38 +302,41 @@ __brick_walk2_n(_RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterato return __unseq_backend::__simd_walk_2(__first1, __n, __first2, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _Function __f, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator2 +__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Function __f) noexcept { - return __internal::__brick_walk2(__first1, __last1, __first2, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag>); + + return __internal::__brick_walk2(__first1, __last1, __first2, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__f, __first1, __first2, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, _IsVector{}); }); return __first2 + (__last1 - __first1); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector, /*parallel=*/::std::true_type) +template +_ForwardIterator2 +__pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + return __internal::__except_handler([&]() { using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2>; auto __begin = _iterator_tuple(__first1, __first2); @@ -339,7 +345,7 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, [&__f](::std::tuple<_ReferenceType1, _ReferenceType2> __val) { __f(::std::get<0>(__val), ::std::get<1>(__val)); }); @@ -352,51 +358,50 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f, - _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator2 +__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, + _Function __f) noexcept { - return __internal::__brick_walk2_n(__first1, __n, __first2, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_walk2_n(__first1, __n, __first2, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_walk2_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _Size __n, _RandomAccessIterator2 __first2, _Function __f) { - return __internal::__pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, - __f, __is_vector, ::std::true_type()); + return __internal::__pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __first2, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator2 +__pattern_walk2_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Brick __brick) noexcept { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(__exec); - return __brick(__first1, __last1, __first2, __is_vector); + static_assert(__is_serial_tag_v<_Tag>); + + return __brick(__first1, __last1, __first2, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick) { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>( - __exec); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; return __except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __brick(__i, __j, __first2 + (__i - __first1), __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __brick(__i, __j, __first2 + (__i - __first1), _IsVector{}); }); return __first2 + (__last1 - __first1); }); @@ -404,11 +409,12 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1 //TODO: it postponed till adding more or less effective parallel implementation template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) +_ForwardIterator2 +__pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2>; auto __begin = _iterator_tuple(__first1, __first2); auto __end = _iterator_tuple(__last1, /*dummy parameter*/ _ForwardIterator2()); @@ -417,7 +423,7 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; return __except_handler([&]() { - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, [__brick](::std::tuple<_ReferenceType1, _ReferenceType2> __val) { __brick(::std::get<0>(__val), ::std::forward<_ReferenceType2>(::std::get<1>(__val))); @@ -431,33 +437,33 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _Size __n, _RandomAccessIterator2 __first2, _Brick __brick) { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>( - __exec); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; return __except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __brick(__i, __j - __i, __first2 + (__i - __first1), __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __brick(__i, __j - __i, __first2 + (__i - __first1), _IsVector{}); }); return __first2 + __n; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Brick __brick, /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator2 +__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, + _ForwardIterator2 __first2, _Brick __brick) noexcept { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(__exec); - return __brick(__first1, __n, __first2, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick(__first1, __n, __first2, typename _Tag::__is_vector{}); } //------------------------------------------------------------------------ @@ -483,46 +489,45 @@ __brick_walk3(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ return __unseq_backend::__simd_walk_3(__first1, __last1 - __first1, __first2, __first3, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator3 +__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) noexcept { - return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag>); + + return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type) +template +_RandomAccessIterator3 +__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, + _Function __f) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__f, __first1, __first2, __first3, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2, __first3](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, - __is_vector); + _IsVector{}); }); return __first3 + (__last1 - __first1); }); } template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2, _ForwardIterator3>, - _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f, _IsVector, - /*parallel=*/::std::true_type) + class _Function> +_ForwardIterator3 +__pattern_walk3(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + return __internal::__except_handler([&]() { using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2, _ForwardIterator3>; auto __begin = _iterator_tuple(__first1, __first2, __first3); @@ -533,7 +538,7 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; typedef typename ::std::iterator_traits<_ForwardIterator3>::reference _ReferenceType3; - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, [&](::std::tuple<_ReferenceType1, _ReferenceType2, _ReferenceType3> __val) { __f(::std::get<0>(__val), ::std::get<1>(__val), ::std::get<2>(__val)); }); @@ -550,26 +555,28 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI // transform_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, _IsVector __is_vector, - _IsParallel __is_parallel) noexcept +template +_ForwardIterator2 +__pattern_walk2_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __func) noexcept { - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __func, __is_vector, - __is_parallel); + static_assert(__is_host_dispatch_tag_v<_Tag>); + + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __func); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - _IsVector __is_vector, _IsParallel __is_parallel) noexcept +template +_ForwardIterator3 +__pattern_walk3_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, + _Function __func) noexcept { - return __pattern_walk3(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __first3, __func, - __is_vector, __is_parallel); + static_assert(__is_host_dispatch_tag_v<_Tag>); + + return __pattern_walk3(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __first3, + __func); } //------------------------------------------------------------------------ @@ -596,32 +603,32 @@ __brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ .first == __last1; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _BinaryPredicate __p, _IsVector __is_vector, /* is_parallel = */ - ::std::false_type) noexcept +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __p) noexcept { - return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _BinaryPredicate __p, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +bool +__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _BinaryPredicate __p) { if (__last1 - __first1 != __last2 - __first2) return false; return __internal::__except_handler([&]() { return !__internal::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - __p, __is_vector); + __p, _IsVector{}); }); }); } @@ -647,27 +654,27 @@ __brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ .first == __last1; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _BinaryPredicate __p, _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _BinaryPredicate __p) noexcept { - return __internal::__brick_equal(__first1, __last1, __first2, __p, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_equal(__first1, __last1, __first2, __p, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _BinaryPredicate __p, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +bool +__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _BinaryPredicate __p) { return __internal::__except_handler([&]() { return !__internal::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, _IsVector{}); }); }); } @@ -694,27 +701,28 @@ __brick_find_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _Pr [&__pred](_RandomAccessIterator __it, _SizeType __i) { return __pred(__it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_find_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, - _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Predicate __pred) noexcept { - return __internal::__brick_find_if(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_find_if(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_find_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Predicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Predicate __pred) { return __except_handler([&]() { - return __parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __brick_find_if(__i, __j, __pred, __is_vector); - }, - ::std::true_type{}); + return __parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __brick_find_if(__i, __j, __pred, _IsVector{}); + }, + ::std::true_type{}); }); } @@ -836,40 +844,39 @@ __brick_find_end(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, return __find_subrange(__first, __last, __last, __s_first, __s_last, __pred, false, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, - _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator1 +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept { - return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator1 +__pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, + _BinaryPredicate __pred) { if (__last - __first == __s_last - __s_first) { - const bool __res = __internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __s_first, __pred, __is_vector, ::std::true_type()); + const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); return __res ? __first : __last; } else { return __internal::__except_handler([&]() { - return __internal::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __s_first, __s_last, __pred, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__find_subrange(__i, __j, __last, __s_first, - __s_last, __pred, false, - __is_vector); - }, - ::std::false_type{}); + return __internal::__parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, + _IsVector{}); + }, + ::std::false_type{}); }); } } @@ -893,28 +900,29 @@ __brick_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _Forw return __unseq_backend::__simd_find_first_of(__first, __last, __s_first, __s_last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator1 +__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept { - return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator1 +__pattern_find_first_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, + _RandomAccessIterator2 __s_last, _BinaryPredicate __pred) { return __internal::__except_handler([&]() { return __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__s_first, __s_last, &__pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { - return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__s_first, __s_last, &__pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, _IsVector{}); }, ::std::true_type{}); }); @@ -939,41 +947,39 @@ __brick_search(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIter return __internal::__find_subrange(__first, __last, __last, __s_first, __s_last, __pred, true, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_search(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, - _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator1 +__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept { - return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator1 +__pattern_search(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, + _BinaryPredicate __pred) { if (__last - __first == __s_last - __s_first) { - const bool __res = __internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __s_first, __pred, __is_vector, ::std::true_type()); + const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); return __res ? __first : __last; } else { return __internal::__except_handler([&]() { - return __internal::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __s_first, __s_last, __pred, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__find_subrange(__i, __j, __last, __s_first, - __s_last, __pred, true, - __is_vector); - }, - ::std::true_type{}); + return __internal::__parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, + _IsVector{}); + }, + /*_IsFirst=*/::std::true_type{}); }); } } @@ -997,38 +1003,36 @@ __brick_search_n(_RandomAccessIterator __first, _RandomAccessIterator __last, _S return __internal::__find_subrange(__first, __last, __last, __count, __value, __pred, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_search_n(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count, - const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count, + const _Tp& __value, _BinaryPredicate __pred) noexcept { - return __internal::__brick_search_n(__first, __last, __count, __value, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_search_n(__first, __last, __count, __value, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { if (static_cast<_Size>(__last - __first) == __count) { - const bool __result = !__internal::__pattern_any_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }, __is_vector, - /*is_parallel*/ ::std::true_type()); + const bool __result = + !__internal::__pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }); return __result ? __first : __last; } else { - return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __is_vector]() { + return __internal::__except_handler([__tag, &__exec, __first, __last, __count, &__value, __pred]() { return __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __count, &__value, __pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __count, &__value, __pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, _IsVector{}); }, ::std::true_type{}); }); @@ -1042,8 +1046,9 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra // clear that doing so is worth the trouble and extra layers of call chain. // Sometimes a little duplication for sake of regularity is better than the alternative. -template -struct __brick_copy_n<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_copy_n<_Tag, _ExecutionPolicy, + ::std::enable_if_t>> { template _RandomAccessIterator2 @@ -1067,10 +1072,9 @@ struct __brick_copy_n<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_hos // copy //------------------------------------------------------------------------ -template -struct __brick_copy<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_copy<_Tag, _ExecutionPolicy, ::std::enable_if_t<__is_host_dispatch_tag_v<_Tag>>> { - template _RandomAccessIterator2 operator()(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, @@ -1100,10 +1104,9 @@ struct __brick_copy<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_ // move //------------------------------------------------------------------------ -template -struct __brick_move<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_move<_Tag, _ExecutionPolicy, ::std::enable_if_t<__is_host_dispatch_tag_v<_Tag>>> { - template _RandomAccessIterator2 operator()(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, @@ -1120,14 +1123,17 @@ struct __brick_move<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_ { return ::std::move(__first, __last, __result); } -}; -template -struct __brick_move_destroy; + template + void + operator()(_ReferenceType1&& __val, _ReferenceType2&& __result) const + { + __result = ::std::move(__val); + } +}; -template -struct __brick_move_destroy<_ExecutionPolicy, - oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template >> +struct __brick_move_destroy { template _RandomAccessIterator2 @@ -1295,48 +1301,51 @@ __brick_partition_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, - _UnaryPredicate __pred, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_copy_if(__first, __last, __result, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __result, _UnaryPredicate __pred, _IsVector __is_vector, - /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; if (_DifferenceType(1) < __n) { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); - return __internal::__except_handler([&__exec, __n, __first, __result, __is_vector, __pred, &__mask_buf]() { + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n); + return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector) + __mask + __i, __pred, _IsVector{}) .first; }, ::std::plus<_DifferenceType>(), // Combine [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan __internal::__brick_copy_by_mask( __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, - [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, __is_vector); + [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{}); }, [&__m](_DifferenceType __total) { __m = __total; }); return __result + __m; }); } // trivial sequence - use serial algorithm - return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector); + return __internal::__brick_copy_if(__first, __last, __result, __pred, _IsVector{}); } //------------------------------------------------------------------------ @@ -1358,22 +1367,22 @@ __brick_count(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pr return ::std::count_if(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, - /* is_parallel */ ::std::false_type, _IsVector __is_vector) noexcept +template +typename ::std::iterator_traits<_ForwardIterator>::difference_type +__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) noexcept { - return __internal::__brick_count(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_count(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_RandomAccessIterator>::difference_type> -__pattern_count(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Predicate __pred, - /* is_parallel */ ::std::true_type, _IsVector __is_vector) +template +typename ::std::iterator_traits<_RandomAccessIterator>::difference_type +__pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Predicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; //trivial pre-checks @@ -1382,9 +1391,10 @@ __pattern_count(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), - [__pred, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) - -> _SizeType { return __value + __internal::__brick_count(__begin, __end, __pred, __is_vector); }, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), + [__pred](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) -> _SizeType { + return __value + __internal::__brick_count(__begin, __end, __pred, _IsVector{}); + }, ::std::plus<_SizeType>()); }); } @@ -1410,32 +1420,36 @@ __brick_unique(_RandomAccessIterator __first, _RandomAccessIterator __last, _Bin return ::std::unique(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_unique(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __pred) noexcept { - return __internal::__brick_unique(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_unique(__first, __last, __pred, typename _Tag::__is_vector{}); } // That function is shared between two algorithms - remove_if (__pattern_remove_if) and unique (pattern unique). But a mask calculation is different. // So, a caller passes _CalcMask brick into remove_elements. -template -_ForwardIterator -__remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _CalcMask __calc_mask, - _IsVector __is_vector) +template +_RandomAccessIterator +__remove_elements(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _CalcMask __calc_mask) { - typedef typename ::std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; - typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _Tp; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; _DifferenceType __n = __last - __first; - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n); // 1. find a first iterator that should be removed return __internal::__except_handler([&]() { bool* __mask = __mask_buf.get(); _DifferenceType __min = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, - [__first, __mask, &__calc_mask, __is_vector](_DifferenceType __i, _DifferenceType __j, - _DifferenceType __local_min) -> _DifferenceType { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, + [__first, __mask, &__calc_mask](_DifferenceType __i, _DifferenceType __j, + _DifferenceType __local_min) -> _DifferenceType { // Create mask __calc_mask(__mask + __i, __mask + __j, __first + __i); @@ -1445,8 +1459,8 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI return __local_min; } // find first iterator that should be removed - bool* __result = __internal::__brick_find_if(__mask + __i, __mask + __j, - [](bool __val) { return !__val; }, __is_vector); + bool* __result = __internal::__brick_find_if( + __mask + __i, __mask + __j, [](bool __val) { return !__val; }, _IsVector{}); if (__result - __mask == __j) { return __local_min; @@ -1465,45 +1479,45 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI __n -= __min; __first += __min; - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n); + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __n); _Tp* __result = __buf.get(); __mask += __min; _DifferenceType __m{}; // 2. Elements that doesn't satisfy pred are moved to result __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), - [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) { - return __internal::__brick_count(__mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, - __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + [__mask](_DifferenceType __i, _DifferenceType __len) { + return __internal::__brick_count( + __mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, _IsVector{}); }, ::std::plus<_DifferenceType>(), [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { __internal::__brick_copy_by_mask( __first + __i, __first + __i + __len, __result + __initial, __mask + __i, - [](_ForwardIterator __x, _Tp* __z) { + [](_RandomAccessIterator __x, _Tp* __z) { if constexpr (::std::is_trivial_v<_Tp>) *__z = ::std::move(*__x); else ::new (::std::addressof(*__z)) _Tp(::std::move(*__x)); }, - __is_vector); + _IsVector{}); }, [&__m](_DifferenceType __total) { __m = __total; }); // 3. Elements from result are moved to [first, last) - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, - [__result, __first, __is_vector](_Tp* __i, _Tp* __j) { - __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __result), - __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + __m, [__result, __first](_Tp* __i, _Tp* __j) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __i, __j, __first + (__i - __result), _IsVector{}); }); return __first + __m; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_unique(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _BinaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_unique(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _BinaryPredicate __pred) { typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; @@ -1514,16 +1528,15 @@ __pattern_unique(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand if (__first + 1 == __last || __first + 2 == __last) { // Trivial sequence - use serial algorithm - return __internal::__brick_unique(__first, __last, __pred, __is_vector); + return __internal::__brick_unique(__first, __last, __pred, _IsVector{}); } return __internal::__remove_elements( - ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, - [&__pred, __is_vector](bool* __b, bool* __e, _RandomAccessIterator __it) { + __tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, + [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { __internal::__brick_walk3( __b, __e, __it - 1, __it, - [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, __is_vector); - }, - __is_vector); + [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, _IsVector{}); + }); } //------------------------------------------------------------------------ @@ -1550,13 +1563,14 @@ __brick_unique_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __las #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, - _BinaryPredicate __pred, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _BinaryPredicate __pred) noexcept { - return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_unique_copy(__first, __last, __result, __pred, typename _Tag::__is_vector{}); } template @@ -1581,25 +1595,26 @@ __brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, return __unseq_backend::__simd_calc_mask_2(__first, __last - __first, __mask, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __result, _BinaryPredicate __pred, _IsVector __is_vector, - /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _BinaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; if (_DifferenceType(2) < __n) { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n); if (_DifferenceType(2) < __n) { - return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __is_vector, &__mask_buf]() { + return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce _DifferenceType __extra = 0; if (__i == 0) @@ -1612,7 +1627,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, ++__extra; } return __internal::__brick_calc_mask_2<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector) + + __mask + __i, __pred, _IsVector{}) + __extra; }, ::std::plus<_DifferenceType>(), // Combine @@ -1620,7 +1635,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, // Phase 2 is same as for __pattern_copy_if __internal::__brick_copy_by_mask( __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, - [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, __is_vector); + [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{}); }, [&__m](_DifferenceType __total) { __m = __total; }); return __result + __m; @@ -1628,7 +1643,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, } } // trivial sequence - use serial algorithm - return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector); + return __internal::__brick_unique_copy(__first, __last, __result, __pred, _IsVector{}); } //------------------------------------------------------------------------ @@ -1663,10 +1678,10 @@ void __brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, _BidirectionalIterator __d_last, /*is_vector=*/::std::false_type) noexcept { - for (--__d_last; __first != __last; ++__first, --__d_last) + for (; __first != __last; ++__first) { using ::std::iter_swap; - iter_swap(__first, __d_last); + iter_swap(__first, --__d_last); } } @@ -1685,25 +1700,33 @@ __brick_reverse(_RandomAccessIterator __first, _RandomAccessIterator __last, _Ra }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _IsVector _is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last) noexcept { - __internal::__brick_reverse(__first, __last, _is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __internal::__brick_reverse(__first, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +void +__pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last) { - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, - [__is_vector, __first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { - __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), __is_vector); - }); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + if (__first == __last) + return; + + __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, + [__first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { + __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), + _IsVector{}); + }); + }); } //------------------------------------------------------------------------ @@ -1730,28 +1753,37 @@ __brick_reverse_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __la __d_first, [](_ReferenceType1 __x, _ReferenceType2 __y) { __y = __x; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _OutputIterator __d_first, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, + _OutputIterator __d_first) noexcept { - return __internal::__brick_reverse_copy(__first, __last, __d_first, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_reverse_copy(__first, __last, __d_first, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_reverse_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + auto __len = __last - __first; - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__is_vector, __first, __len, __d_first](_RandomAccessIterator1 __inner_first, - _RandomAccessIterator1 __inner_last) { - __internal::__brick_reverse_copy(__inner_first, __inner_last, - __d_first + (__len - (__inner_last - __first)), - __is_vector); - }); - return __d_first + __len; + + if (__len == 0) + return __d_first; + + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last) { + __internal::__brick_reverse_copy(__inner_first, __inner_last, + __d_first + (__len - (__inner_last - __first)), _IsVector{}); + }); + return __d_first + __len; + }); } //------------------------------------------------------------------------ @@ -1806,43 +1838,47 @@ __brick_rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _R return __ret; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last) noexcept { - return __internal::__brick_rotate(__first, __middle, __last, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_rotate(__first, __middle, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; auto __n = __last - __first; auto __m = __middle - __first; if (__m <= __n / 2) { - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n - __m); - return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __n - __m); + return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, &__buf]() { _Tp* __result = __buf.get(); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, - [__middle, __result, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __middle), __is_vector); - }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__middle, __result](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_uninitialized_move( + __b, __e, __result + (__b - __middle), _IsVector{}); + }); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, - [__last, __middle, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_move<_ExecutionPolicy>{}(__b, __e, __b + (__last - __middle), __is_vector); - }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__last, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __b, __e, __b + (__last - __middle), _IsVector{}); + }); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + (__n - __m), - [__first, __result, __is_vector](_Tp* __b, _Tp* __e) { - __brick_move_destroy<_ExecutionPolicy>{}( - __b, __e, __first + (__b - __result), __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + (__n - __m), [__first, __result](_Tp* __b, _Tp* __e) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __b, __e, __first + (__b - __result), _IsVector{}); }); return __first + (__last - __middle); @@ -1850,25 +1886,25 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand } else { - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__m); - return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __m); + return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, &__buf]() { _Tp* __result = __buf.get(); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, - [__first, __result, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __first), __is_vector); - }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__first, __result](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_uninitialized_move( + __b, __e, __result + (__b - __first), _IsVector{}); + }); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, - [__first, __middle, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_move<_ExecutionPolicy>{}(__b, __e, __first + (__b - __middle), __is_vector); - }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__first, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __b, __e, __first + (__b - __middle), _IsVector{}); + }); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, - [__n, __m, __first, __result, __is_vector](_Tp* __b, _Tp* __e) { - __brick_move_destroy<_ExecutionPolicy>{}( - __b, __e, __first + ((__n - __m) + (__b - __result)), __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + __m, [__n, __m, __first, __result](_Tp* __b, _Tp* __e) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __b, __e, __first + ((__n - __m) + (__b - __result)), _IsVector{}); }); return __first + (__last - __middle); @@ -1880,63 +1916,70 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand // rotate_copy //------------------------------------------------------------------------ -template +template _OutputIterator -__brick_rotate_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, - _OutputIterator __result, /*__is_vector=*/::std::false_type) noexcept +__brick_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last, _OutputIterator __result) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return ::std::rotate_copy(__first, __middle, __last, __result); } -template +template _RandomAccessIterator2 -__brick_rotate_copy(_ExecutionPolicy&&, _RandomAccessIterator1 __first, _RandomAccessIterator1 __middle, - _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, - /*__is_vector=*/::std::true_type) noexcept +__brick_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last, + _RandomAccessIterator2 __result) noexcept { - _RandomAccessIterator2 __res = __brick_copy<_ExecutionPolicy>{}(__middle, __last, __result, ::std::true_type()); - return __internal::__brick_copy<_ExecutionPolicy>{}(__first, __middle, __res, ::std::true_type()); + _RandomAccessIterator2 __res = + __brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}(__middle, __last, __result); + return __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}(__first, __middle, __res); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, - _ForwardIterator __last, _OutputIterator __result, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_rotate_copy(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last, _OutputIterator __result) noexcept { - return __internal::__brick_rotate_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, - __result, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_rotate_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, + __result); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __middle, - _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result) { - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __last, __middle, __result, __is_vector](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { - __internal::__brick_copy<_ExecutionPolicy> __copy{}; - if (__b > __middle) - { - __copy(__b, __e, __result + (__b - __middle), __is_vector); - } - else - { - _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first)); - if (__e < __middle) + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __last, __middle, __result](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { + __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy> __copy{}; + if (__b > __middle) { - __copy(__b, __e, __new_result, __is_vector); + __copy(__b, __e, __result + (__b - __middle), _IsVector{}); } else { - __copy(__b, __middle, __new_result, __is_vector); - __copy(__middle, __e, __result, __is_vector); + _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first)); + if (__e < __middle) + { + __copy(__b, __e, __new_result, _IsVector{}); + } + else + { + __copy(__b, __middle, __new_result, _IsVector{}); + __copy(__middle, __e, __result, _IsVector{}); + } } - } - }); - return __result + (__last - __first); + }); + return __result + (__last - __first); + }); } //------------------------------------------------------------------------ @@ -1978,19 +2021,23 @@ __brick_is_partitioned(_RandomAccessIterator __first, _RandomAccessIterator __la } } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +bool +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_is_partitioned(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_is_partitioned(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + //trivial pre-checks if (__first == __last) return true; @@ -2028,9 +2075,9 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs const _ReduceType __identity{__not_init, __last}; _ReduceType __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, - [&__pred, __combine, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, - _ReduceType __value) -> _ReduceType { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, + [&__pred, __combine](_RandomAccessIterator __i, _RandomAccessIterator __j, + _ReduceType __value) -> _ReduceType { if (__value.__val == __broken) return _ReduceType{__broken, __i}; @@ -2040,11 +2087,11 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs { // find first element that don't satisfy pred _RandomAccessIterator __x = - __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate&>(__pred), __is_vector); + __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate&>(__pred), _IsVector{}); if (__x != __j) { // find first element after "x" that satisfy pred - _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, __is_vector); + _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, _IsVector{}); // if it was found then range isn't partitioned by pred if (__y != __j) return _ReduceType{__broken, __i}; @@ -2058,7 +2105,7 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs { // if first element doesn't satisfy pred // then we should find the first element that satisfy pred. // If we found it then range isn't partitioned by pred - if (__internal::__brick_find_if(__i + 1, __j, __pred, __is_vector) != __j) + if (__internal::__brick_find_if(__i + 1, __j, __pred, _IsVector{}) != __j) return _ReduceType{__broken, __i}; __res = _ReduceType{__all_false, __i}; @@ -2104,19 +2151,22 @@ __brick_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _ return ::std::partition(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_partition(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_partition(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_partition(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) @@ -2131,7 +2181,7 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R _PartitionRange __init{__last, __last, __last}; // lambda for merging two partitioned ranges to one partitioned range - auto __reductor = [&__exec, __is_vector](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { + auto __reductor = [&__exec](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { auto __size1 = __val1.__end - __val1.__pivot; auto __size2 = __val2.__pivot - __val2.__begin; auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); @@ -2146,10 +2196,10 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R else if (__size2 > __size1) { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, - [__val1, __val2, __size1, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, + [__val1, __val2, __size1](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), - __is_vector); + _IsVector{}); }); return {__new_begin, __val2.__pivot - __size1, __val2.__end}; } @@ -2157,20 +2207,20 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R else { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, - [__val1, __val2, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, + [__val1, __val2](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), _IsVector{}); }); return {__new_begin, __val1.__pivot + __size2, __val2.__end}; } }; _PartitionRange __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, - [__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, - _PartitionRange __value) -> _PartitionRange { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, + _PartitionRange __value) -> _PartitionRange { //1. serial partition - _RandomAccessIterator __pivot = __internal::__brick_partition(__i, __j, __pred, __is_vector); + _RandomAccessIterator __pivot = __internal::__brick_partition(__i, __j, __pred, _IsVector{}); // 2. merging of two ranges (left and right respectively) return __reductor(__value, {__i, __pivot, __j}); @@ -2201,21 +2251,23 @@ __brick_stable_partition(_RandomAccessIterator __first, _RandomAccessIterator __ return ::std::stable_partition(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, - /*is_parallelization=*/::std::false_type) noexcept +template +_BidirectionalIterator +__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_stable_partition(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_stable_partition(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, - /*is_parallelization=*/::std::true_type) +template +_RandomAccessIterator +__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) struct _PartitionRange @@ -2229,7 +2281,7 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi _PartitionRange __init{__last, __last, __last}; // lambda for merging two partitioned ranges to one partitioned range - auto __reductor = [__is_vector](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { + auto __reductor = [](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { auto __size1 = __val1.__end - __val1.__pivot; auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); @@ -2242,17 +2294,17 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi // then we should swap the false part of left range and last part of true part of right range else { - __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, __is_vector); + __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, _IsVector{}); return {__new_begin, __val2.__pivot - __size1, __val2.__end}; } }; _PartitionRange __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, - [&__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, - _PartitionRange __value) -> _PartitionRange { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [&__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, + _PartitionRange __value) -> _PartitionRange { //1. serial stable_partition - _RandomAccessIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, __is_vector); + _RandomAccessIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, _IsVector{}); // 2. merging of two ranges (left and right respectively) return __reductor(__value, {__i, __pivot, __j}); @@ -2288,40 +2340,42 @@ __brick_partition_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __ #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallelization=*/::std::false_type) noexcept +template +::std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept { - return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator2, _RandomAccessIterator3>> -__pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __out_true, _RandomAccessIterator3 __out_false, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallelization=*/::std::true_type) +template +::std::pair<_RandomAccessIterator2, _RandomAccessIterator3> +__pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __out_true, + _RandomAccessIterator3 __out_false, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; typedef ::std::pair<_DifferenceType, _DifferenceType> _ReturnType; const _DifferenceType __n = __last - __first; if (_DifferenceType(1) < __n) { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); - return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __is_vector, __pred, - &__mask_buf]() { + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n); + return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _ReturnType __m{}; __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector); + __mask + __i, __pred, _IsVector{}); }, [](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType { return ::std::make_pair(__x.first + __y.first, __x.second + __y.second); @@ -2329,39 +2383,45 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir [=](_DifferenceType __i, _DifferenceType __len, _ReturnType __initial) { // Scan __internal::__brick_partition_by_mask(__first + __i, __first + (__i + __len), __out_true + __initial.first, __out_false + __initial.second, - __mask + __i, __is_vector); + __mask + __i, _IsVector{}); }, [&__m](_ReturnType __total) { __m = __total; }); return ::std::make_pair(__out_true + __m.first, __out_false + __m.second); }); } // trivial sequence - use serial algorithm - return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector); + return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, _IsVector{}); } //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::false_type, _IsMoveConstructible) noexcept +template +void +__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + _IsMoveConstructible) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + ::std::sort(__first, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type) +template +void +__pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp, + /*is_move_constructible=*/::std::true_type) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { ::std::sort(__first, __last, __comp); }, - __last - __first); + __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + ::std::sort(__first, __last, __comp); + }, + __last - __first); }); } @@ -2369,24 +2429,30 @@ __pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Random // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + ::std::stable_sort(__first, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type) +template +void +__pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { ::std::stable_sort(__first, __last, __comp); }, - __last - __first); + __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + ::std::stable_sort(__first, __last, __comp); + }, + __last - __first); }); } @@ -2394,41 +2460,47 @@ __pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, // sort_by_key //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_sort_by_key(_Tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, + _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, + _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - auto __cmp_f = - [__comp](const auto& __a, const auto& __b) { return __comp(::std::get<0>(__a), ::std::get<0>(__b)); }; + auto __cmp_f = [__comp](const auto& __a, const auto& __b) { + return __comp(::std::get<0>(__a), ::std::get<0>(__b)); + }; ::std::sort(__beg, __end, __cmp_f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::true_type) +template +void +__pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, + _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp) { - static_assert(::std::is_move_constructible_v::value_type> - && ::std::is_move_constructible_v::value_type>, + static_assert( + ::std::is_move_constructible_v::value_type> && + ::std::is_move_constructible_v::value_type>, "The keys and values should be move constructible in case of parallel execution."); auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - auto __cmp_f = - [__comp](const auto& __a, const auto& __b) { return __comp(::std::get<0>(__a), ::std::get<0>(__b)); }; + auto __cmp_f = [__comp](const auto& __a, const auto& __b) { + return __comp(::std::get<0>(__a), ::std::get<0>(__b)); + }; + + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, - [](auto __first, auto __last, auto __cmp) - { ::std::sort(__first, __last, __cmp); },__end - __beg); + __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, + [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); }); } @@ -2436,27 +2508,30 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last, _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + ::std::partial_sort(__first, __middle, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector, /*is_parallel=*/::std::true_type) +template +void +__pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + const auto __n = __middle - __first; if (__n == 0) return; __except_handler([&]() { __par_backend::__parallel_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { if (__n < __end - __begin) ::std::partial_sort(__begin, __begin + __n, __end, __comp); @@ -2471,22 +2546,25 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, // partial_sort_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return ::std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _RandomAccessIterator2 __d_last, _Compare __comp, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, + _RandomAccessIterator2 __d_last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__last == __first || __d_last == __d_first) { return __d_first; @@ -2497,14 +2575,13 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ if (__n2 >= __n1) { __par_backend::__parallel_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, - [__first, __d_first, __is_vector](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, - _Compare __comp) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, + [__first, __d_first](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp) { _RandomAccessIterator1 __i1 = __first + (__i - __d_first); _RandomAccessIterator1 __j1 = __first + (__j - __d_first); // 1. Copy elements from input to output - __brick_copy<_ExecutionPolicy>{}(__i1, __j1, __i, __is_vector); + __brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}(__i1, __j1, __i, _IsVector{}); // 2. Sort elements in output sequence ::std::sort(__i, __j, __comp); }, @@ -2515,38 +2592,39 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ { typedef typename ::std::iterator_traits<_RandomAccessIterator1>::value_type _T1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::value_type _T2; - __par_backend::__buffer<_ExecutionPolicy, _T1> __buf(__n1); + __par_backend::__buffer<_ExecutionPolicy, _T1> __buf(__exec, __n1); _T1* __r = __buf.get(); - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, - [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { - _RandomAccessIterator1 __it = __first + (__i - __r); + __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, + [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { + _RandomAccessIterator1 __it = __first + (__i - __r); - // 1. Copy elements from input to raw memory - for (_T1* __k = __i; __k != __j; ++__k, ++__it) - { - ::new (__k) _T2(*__it); - } + // 1. Copy elements from input to raw memory + for (_T1* __k = __i; __k != __j; ++__k, ++__it) + { + ::new (__k) _T2(*__it); + } - // 2. Sort elements in temporary buffer - if (__n2 < __j - __i) - ::std::partial_sort(__i, __i + __n2, __j, __comp); - else - ::std::sort(__i, __j, __comp); - }, - __n2); + // 2. Sort elements in temporary buffer + if (__n2 < __j - __i) + ::std::partial_sort(__i, __i + __n2, __j, __comp); + else + ::std::sort(__i, __j, __comp); + }, + __n2); // 3. Move elements from temporary buffer to output - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, - [__r, __d_first, __is_vector](_T1* __i, _T1* __j) { - __brick_move_destroy<_ExecutionPolicy>{}( - __i, __j, __d_first + (__i - __r), __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, + [__r, __d_first](_T1* __i, _T1* __j) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __i, __j, __d_first + (__i - __r), _IsVector{}); }); if constexpr (!::std::is_trivially_destructible_v<_T1>) - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __r + __n2, __r + __n1, - [__is_vector](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, __is_vector); }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r + __n2, + __r + __n1, + [](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, _IsVector{}); }); return __d_first + __n2; } @@ -2572,28 +2650,31 @@ __brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _Binary return ::std::adjacent_find(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred, - /* is_parallel */ ::std::false_type, _IsVector __is_vector, _Semantic) noexcept +template +_ForwardIterator +__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __pred, _Semantic) noexcept { - return __internal::__brick_adjacent_find(__first, __last, __pred, __is_vector, _Semantic::value); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_adjacent_find(__first, __last, __pred, typename _Tag::__is_vector{}, _Semantic::value); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _BinaryPredicate __pred, /* is_parallel */ ::std::true_type, _IsVector __is_vector, - _Semantic __or_semantic) +template +_RandomAccessIterator +__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _BinaryPredicate __pred, _Semantic __or_semantic) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__last - __first < 2) return __last; return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, - [__last, __pred, __is_vector, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, - _RandomAccessIterator __value) -> _RandomAccessIterator { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, + [__last, __pred, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, + _RandomAccessIterator __value) -> _RandomAccessIterator { // TODO: investigate performance benefits from the use of shared variable for the result, // checking (compare_and_swap idiom) its __value at __first. if (__or_semantic && __value < __last) @@ -2612,7 +2693,7 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first //correct the global result iterator if the "brick" returns a local "__last" const _RandomAccessIterator __res = - __internal::__brick_adjacent_find(__begin, __end, __pred, __is_vector, __or_semantic); + __internal::__brick_adjacent_find(__begin, __end, __pred, _IsVector{}, __or_semantic); if (__res < __end) __value = __res; } @@ -2629,20 +2710,20 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth, - _RandomAccessIterator __last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last, _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + ::std::nth_element(__first, __nth, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, - _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +void +__pattern_nth_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) { if (__first == __last || __nth == __last) { @@ -2654,10 +2735,8 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __x; do { - __x = __internal::__pattern_partition(::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, - [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }, - __is_vector, - /*is_parallel=*/::std::true_type()); + __x = __internal::__pattern_partition(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, + [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }); --__x; if (__x != __first) { @@ -2685,8 +2764,8 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, //------------------------------------------------------------------------ // fill, fill_n //------------------------------------------------------------------------ -template -struct __brick_fill<_Tp, _ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_fill<_Tag, _ExecutionPolicy, _Tp, ::std::enable_if_t<__is_host_dispatch_tag_v<_Tag>>> { const _Tp& __value; @@ -2707,33 +2786,34 @@ struct __brick_fill<_Tp, _ExecutionPolicy, oneapi::dpl::__internal::__enable_if_ } }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_fill(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept +template +void +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) noexcept { - __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__first, __last, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __internal::__brick_fill<_Tag, _ExecutionPolicy, _Tp>{__value}(__first, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_fill(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - const _Tp& __value, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) +template +_RandomAccessIterator +__pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, const _Tp& __value) { - return __internal::__except_handler([&__exec, __first, __last, &__value, __is_vector]() { - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__value, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { - __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__begin, __end, __is_vector); - }); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&__exec, __first, __last, &__value]() { + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value](_RandomAccessIterator __begin, _RandomAccessIterator __end) { + __internal::__brick_fill<__parallel_tag<_IsVector>, _ExecutionPolicy, _Tp>{ + __value}(__begin, __end, _IsVector{}); + }); return __last; }); } -template -struct __brick_fill_n<_Tp, _ExecutionPolicy, - oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_fill_n<_Tag, _ExecutionPolicy, _Tp, ::std::enable_if_t<__is_host_dispatch_tag_v<_Tag>>> { const _Tp& __value; @@ -2754,21 +2834,23 @@ struct __brick_fill_n<_Tp, _ExecutionPolicy, } }; -template +template _OutputIterator -__pattern_fill_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept +__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value) noexcept { - return __internal::__brick_fill_n<_Tp, _ExecutionPolicy>{__value}(__first, __count, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_fill_n<_Tag, _ExecutionPolicy, _Tp>{__value}(__first, __count, + typename _Tag::__is_vector{}); } -template +template _RandomAccessIterator -__pattern_fill_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, const _Tp& __value, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) +__pattern_fill_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __count, const _Tp& __value) { - return __internal::__pattern_fill(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __value, - ::std::true_type(), __is_vector); + return __internal::__pattern_fill(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, + __value); } //------------------------------------------------------------------------ @@ -2790,24 +2872,26 @@ __brick_generate(_ForwardIterator __first, _ForwardIterator __last, _Generator _ ::std::generate(__first, __last, __g); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_generate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept +template +void +__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) noexcept { - __internal::__brick_generate(__first, __last, __g, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __internal::__brick_generate(__first, __last, __g, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_generate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Generator __g, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) +template +_RandomAccessIterator +__pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Generator __g) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__g, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { - __internal::__brick_generate(__begin, __end, __g, __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__g](_RandomAccessIterator __begin, _RandomAccessIterator __end) { + __internal::__brick_generate(__begin, __end, __g, _IsVector{}); }); return __last; }); @@ -2828,23 +2912,24 @@ __brick_generate_n(OutputIterator __first, Size __count, _Generator __g, /* is_v return ::std::generate_n(__first, __count, __g); } -template +template _OutputIterator -__pattern_generate_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept +__pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g) noexcept { - return __internal::__brick_generate_n(__first, __count, __g, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_generate_n(__first, __count, __g, typename _Tag::__is_vector{}); } -template +template _RandomAccessIterator -__pattern_generate_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, _Generator __g, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) +__pattern_generate_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __count, _Generator __g) { static_assert(__is_random_access_iterator_v<_RandomAccessIterator>, "Pattern-brick error. Should be a random access iterator."); - return __internal::__pattern_generate(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __g, - ::std::true_type(), __is_vector); + return __internal::__pattern_generate(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, + __g); } //------------------------------------------------------------------------ @@ -2871,34 +2956,35 @@ __brick_remove_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _ #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel*/ ::std::false_type) noexcept +template +_ForwardIterator +__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_remove_if(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_remove_if(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_remove_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel*/ ::std::true_type) +template +_RandomAccessIterator +__pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; if (__first == __last || __first + 1 == __last) { // Trivial sequence - use serial algorithm - return __internal::__brick_remove_if(__first, __last, __pred, __is_vector); + return __internal::__brick_remove_if(__first, __last, __pred, _IsVector{}); } return __internal::__remove_elements( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__pred, __is_vector](bool* __b, bool* __e, _RandomAccessIterator __it) { - __internal::__brick_walk2(__b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, - __is_vector); - }, - __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { + __internal::__brick_walk2( + __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, _IsVector{}); + }); } //------------------------------------------------------------------------ @@ -2924,30 +3010,38 @@ __brick_merge(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ return ::std::merge(__first1, __last1, __first2, __last2, __d_first, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept +template +_OutputIterator +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __d_first, + _Compare __comp) noexcept { - return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __d_first, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +_RandomAccessIterator3 +__pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __d_first, _Compare __comp) { - __par_backend::__parallel_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, - [__is_vector](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, - _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) { - return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, __is_vector); - }); - return __d_first + (__last1 - __first1) + (__last2 - __first2); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&]() { + __par_backend::__parallel_merge( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, + __comp, + [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, + _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) { + return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, _IsVector{}); + }); + return __d_first + (__last1 - __first1) + (__last2 - __first2); + }); } //------------------------------------------------------------------------ @@ -2970,28 +3064,31 @@ __brick_inplace_merge(_RandomAccessIterator __first, _RandomAccessIterator __mid ::std::inplace_merge(__first, __middle, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle, - _BidirectionalIterator __last, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept +template +void +__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle, + _BidirectionalIterator __last, _Compare __comp) noexcept { - __internal::__brick_inplace_merge(__first, __middle, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __internal::__brick_inplace_merge(__first, __middle, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +void +__pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__first == __last || __first == __middle || __middle == __last) { return; } + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; auto __n = __last - __first; - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n); + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __n); _Tp* __r = __buf.get(); __internal::__except_handler([&]() { auto __move_values = [](_RandomAccessIterator __x, _Tp* __z) { @@ -3002,11 +3099,11 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first }; auto __move_sequences = [](_RandomAccessIterator __first1, _RandomAccessIterator __last1, _Tp* __first2) { - return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector()); + return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector{}); }; __par_backend::__parallel_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, _RandomAccessIterator __f2, _RandomAccessIterator __l2, _Tp* __f3, _Compare __comp) { @@ -3014,10 +3111,11 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first __move_sequences, __move_sequences); return __f3 + (__l1 - __f1) + (__l2 - __f2); }); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, [__r, __first, __is_vector](_Tp* __i, _Tp* __j) { - __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __r), __is_vector); - }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, + [__r, __first](_Tp* __i, _Tp* __j) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __i, __j, __first + (__i - __r), _IsVector{}); + }); }); } @@ -3025,21 +3123,22 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first // includes //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept +template +bool +__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return ::std::includes(__first1, __last1, __first2, __last2, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Compare __comp, _IsVector, - /*is_parallel=*/::std::true_type) +template +bool +__pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Compare __comp) { if (__first2 == __last2) return true; @@ -3061,14 +3160,14 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ return __internal::__except_handler([&]() { return !__internal::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { assert(__j > __i); //assert(__j - __i > 1); //1. moving boundaries to "consume" subsequence of equal elements auto __is_equal_sorted = [&__comp](_RandomAccessIterator2 __a, _RandomAccessIterator2 __b) -> bool { - //enough one call of __comp due to compared couple belongs to one sorted sequience + //enough one call of __comp due to compared couple belongs to one sorted sequence return !__comp(*__a, *__b); }; @@ -3098,14 +3197,16 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ inline constexpr auto __set_algo_cut_off = 1000; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _SizeFunction __size_func, _SetOP __set_op, _IsVector __is_vector) +template +_OutputIterator +__parallel_set_op(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op) { - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; typedef typename ::std::iterator_traits<_OutputIterator>::value_type _T; struct _SetRange @@ -3121,23 +3222,23 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar const _DifferenceType __n1 = __last1 - __first1; const _DifferenceType __n2 = __last2 - __first2; - __par_backend::__buffer<_ExecutionPolicy, _T> __buf(__size_func(__n1, __n2)); + __par_backend::__buffer<_ExecutionPolicy, _T> __buf(__exec, __size_func(__n1, __n2)); - return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __is_vector, - __comp, __size_func, __set_op, &__buf]() { + return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __comp, + __size_func, __set_op, &__buf]() { auto __tmp_memory = __buf.get(); _DifferenceType __m{}; auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan if (!__s.empty()) - __brick_move_destroy<_ExecutionPolicy>{}(__tmp_memory + __s.__buf_pos, - __tmp_memory + (__s.__buf_pos + __s.__len), - __result + __s.__pos, __is_vector); + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __tmp_memory + __s.__buf_pos, __tmp_memory + (__s.__buf_pos + __s.__len), __result + __s.__pos, + _IsVector{}); }; __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, - [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce //[__b; __e) - a subrange of the first sequence, to reduce - _ForwardIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len); + _RandomAccessIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len); //try searching for the first element which not equal to *__b if (__b != __first1) @@ -3150,7 +3251,7 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar //check is [__b; __e) empty if (__e - __b < 1) { - _ForwardIterator2 __bb = __last2; + _RandomAccessIterator2 __bb = __last2; if (__b != __last1) __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); @@ -3159,11 +3260,11 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar } //try searching for "corresponding" subrange [__bb; __ee) in the second sequence - _ForwardIterator2 __bb = __first2; + _RandomAccessIterator2 __bb = __first2; if (__b != __first1) __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); - _ForwardIterator2 __ee = __last2; + _RandomAccessIterator2 __ee = __last2; if (__e != __last1) __ee = ::std::lower_bound(__bb, __last2, *__e, __comp); @@ -3189,64 +3290,66 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar } //a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _SetUnionOp __set_union_op, _IsVector __is_vector) +template +_OutputIterator +__parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _OutputIterator __result, _Compare __comp, _SetUnionOp __set_union_op) { - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; - __brick_copy<_ExecutionPolicy> __copy_range{}; + __brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy> __copy_range{}; // {1} {}: parallel copying just first sequence if (__n2 == 0) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __copy_range, ::std::true_type()); + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __copy_range); // {} {2}: parallel copying justmake second sequence if (__n1 == 0) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, - __copy_range, ::std::true_type()); + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result, __copy_range); // testing whether the sequences are intersected - _ForwardIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); if (__left_bound_seq_1 == __last1) { //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __copy_range); }, [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, - __result + __n1, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result + __n1, __copy_range); }); return __result + __n1 + __n2; } // testing whether the sequences are intersected - _ForwardIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); + _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); if (__left_bound_seq_2 == __last2) { //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, - __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result, __copy_range); }, [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __result + __n2, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result + __n2, __copy_range); }); return __result + __n1 + __n2; } @@ -3257,17 +3360,17 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ auto __res_or = __result; __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first1; left_bound_seq_1) [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, - __left_bound_seq_1, __res_or, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __left_bound_seq_1, __res_or, __copy_range); }, [=, &__result] { __result = __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, - __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); }); return __result; } @@ -3279,24 +3382,24 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ auto __res_or = __result; __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first2; left_bound_seq_2) [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, - __left_bound_seq_2, __res_or, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __left_bound_seq_2, __res_or, __copy_range); }, [=, &__result] { __result = __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, - __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); }); return __result; } return __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); } //------------------------------------------------------------------------ @@ -3333,25 +3436,26 @@ __brick_set_union(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last return ::std::set_union(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept { - return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*__is_parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_union(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _OutputIterator __result, _Compare __comp) { - const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3361,13 +3465,12 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; return __parallel_set_union_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + }); } //------------------------------------------------------------------------ @@ -3394,23 +3497,25 @@ __brick_set_intersection(_RandomAccessIterator1 __first1, _RandomAccessIterator1 return ::std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, +template +_OutputIterator +__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept + _Compare __comp) noexcept { - return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _RandomAccessIterator3 __result, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator3 +__pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) { typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; @@ -3438,31 +3543,33 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f if (__m1 > __set_algo_cut_off) { //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) - return __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, - __result, __comp); - }, - __is_vector); + return __internal::__except_handler([&]() { + return __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, + __result, __comp); + }); + }); } const auto __m2 = __last2 - __left_bound_seq_2 + __n1; if (__m2 > __set_algo_cut_off) { //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) - __result = __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, - __result, __comp); - }, - __is_vector); - return __result; + return __internal::__except_handler([&]() { + __result = __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, + __result, __comp); + }); + return __result; + }); } // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm @@ -3492,23 +3599,25 @@ __brick_set_difference(_RandomAccessIterator1 __first1, _RandomAccessIterator1 _ return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept + _Compare __comp) noexcept { - return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _RandomAccessIterator3 __result, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator3 +__pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) { typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; @@ -3522,33 +3631,32 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir // {1} \ {}: parallel copying just first sequence if (__n2 == 0) - return __pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}); // testing whether the sequences are intersected _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence if (__left_bound_seq_1 == __last1) - return __pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}); // testing whether the sequences are intersected _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence if (__left_bound_seq_2 == __last2) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}); if (__n1 + __n2 > __set_algo_cut_off) return __parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType) { return __n; }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + }); // use serial algorithm return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); @@ -3578,26 +3686,27 @@ __brick_set_symmetric_difference(_RandomAccessIterator1 __first1, _RandomAccessI return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, +template +_OutputIterator +__pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept + _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return __internal::__brick_set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp, - __is_vector); + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, - _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator3 +__pattern_set_symmetric_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __result, _Compare __comp) { - const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3606,14 +3715,15 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessItera return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; - return __internal::__parallel_set_union_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_symmetric_difference_construct( - __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + return __internal::__except_handler([&]() { + return __internal::__parallel_set_union_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_symmetric_difference_construct( + __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); + }); + }); } //------------------------------------------------------------------------ @@ -3639,12 +3749,14 @@ __brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __las [&__comp](_RandomAccessIterator __it, _SizeType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +_RandomAccessIterator +__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept { - return __internal::__brick_is_heap_until(__first, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_is_heap_until(__first, __last, __comp, typename _Tag::__is_vector{}); } template @@ -3669,18 +3781,18 @@ __is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _D [&__comp](_RandomAccessIterator __it, _DifferenceType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +_RandomAccessIterator +__pattern_is_heap_until(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { return __internal::__except_handler([&]() { - return __parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, - __comp, __is_vector); - }, - ::std::true_type{}); + return __parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, _IsVector{}); + }, + ::std::true_type{}); }); } @@ -3725,24 +3837,26 @@ __is_heap_local(_RandomAccessIterator __first, _DifferenceType __begin, _Differe }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +bool +__pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept { - return __internal::__brick_is_heap(__first, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_is_heap(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +bool +__pattern_is_heap(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { return __internal::__except_handler([&]() { - return !__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return !__parallel_or(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { return !__internal::__is_heap_local(__first, __i - __first, __j - __first, __comp, - __is_vector); + _IsVector{}); }); }); } @@ -3771,30 +3885,34 @@ __brick_min_element(_RandomAccessIterator __first, _RandomAccessIterator __last, #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_min_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +_ForwardIterator +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Compare __comp) noexcept { - return __internal::__brick_min_element(__first, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_min_element(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_min_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +_RandomAccessIterator +__pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + // a trivial case pre-check if (__last - __first < 2) return __first; return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __init) -> _RandomAccessIterator { const _RandomAccessIterator __subresult = - __internal::__brick_min_element(__begin, __end, __comp, __is_vector); + __internal::__brick_min_element(__begin, __end, __comp, _IsVector{}); return __init == __last ? __subresult : __internal::__cmp_iterators_by_values(__init, __subresult, __comp, oneapi::dpl::__internal::__pstl_less()); @@ -3834,21 +3952,23 @@ __brick_minmax_element(_RandomAccessIterator __first, _RandomAccessIterator __la #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator, _ForwardIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +::std::pair<_ForwardIterator, _ForwardIterator> +__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Compare __comp) noexcept { - return __internal::__brick_minmax_element(__first, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_minmax_element(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator, _RandomAccessIterator>> -__pattern_minmax_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +::std::pair<_RandomAccessIterator, _RandomAccessIterator> +__pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + // a trivial case pre-check if (__last - __first < 2) return ::std::make_pair(__first, __first); @@ -3857,10 +3977,10 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ ::std::make_pair(__last, __last), [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { - const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, __is_vector); + const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, _IsVector{}); if (__init.first == __last) // = identity return __subresult; return ::std::make_pair( @@ -3913,31 +4033,30 @@ __brick_mismatch(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1 return __unseq_backend::__simd_first(__first1, __n, __first2, __not_pred<_Predicate&>(__pred)); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator1, _ForwardIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept +template +::std::pair<_ForwardIterator1, _ForwardIterator2> +__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred) noexcept { - return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2>> -__pattern_mismatch(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Predicate __pred, - _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__pattern_mismatch(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Predicate __pred) { return __internal::__except_handler([&]() { auto __n = ::std::min(__last1 - __first1, __last2 - __first2); auto __result = __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [__first1, __first2, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - __pred, __is_vector) + __pred, _IsVector{}) .first; }, ::std::true_type{}); @@ -3995,22 +4114,24 @@ __brick_lexicographical_compare(_RandomAccessIterator1 __first1, _RandomAccessIt } } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +bool +__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept { - return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, - _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::true_type) +template +bool +__pattern_lexicographical_compare(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Compare __comp) noexcept { if (__first2 == __last2) { // if second sequence is empty @@ -4024,29 +4145,33 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterat { typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _RefType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _RefType2; - --__last1; - --__last2; - auto __n = ::std::min(__last1 - __first1, __last2 - __first2); - auto __result = __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [__first1, __first2, &__comp, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - [&__comp](const _RefType1 __x, const _RefType2 __y) { - return !__comp(__x, __y) && !__comp(__y, __x); - }, - __is_vector) - .first; - }, - ::std::true_type{}); - if (__result == __last1 && __first2 + (__result - __first1) != __last2) - { // if first sequence shorter than second - return !__comp(*(__first2 + (__result - __first1)), *__result); - } - else - { // if second sequence shorter than first or both have the same number of elements - return __comp(*__result, *(__first2 + (__result - __first1))); - } + return __internal::__except_handler([&]() { + --__last1; + --__last2; + auto __n = ::std::min(__last1 - __first1, __last2 - __first2); + auto __result = __internal::__parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, &__comp](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__brick_mismatch( + __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), + [&__comp](const _RefType1 __x, const _RefType2 __y) { + return !__comp(__x, __y) && !__comp(__y, __x); + }, + _IsVector{}) + .first; + }, + ::std::true_type{}); + + if (__result == __last1 && __first2 + (__result - __first1) != __last2) + { // if first sequence shorter than second + return !__comp(*(__first2 + (__result - __first1)), *__result); + } + else + { // if second sequence shorter than first or both have the same number of elements + return __comp(*__result, *(__first2 + (__result - __first1))); + } + }); } } @@ -4054,14 +4179,14 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterat // swap //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, _IsParallel __is_parallel) +template +_ForwardIterator2 +__pattern_swap(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Function __f) { - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f, __is_vector, - __is_parallel); + static_assert(__is_host_dispatch_tag_v<_Tag>); + + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); } //------------------------------------------------------------------------ @@ -4132,21 +4257,24 @@ __brick_shift_left(_ForwardIterator __first, _ForwardIterator __last, return __first + __size_res; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - typename ::std::iterator_traits<_ForwardIterator>::difference_type __n, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + typename ::std::iterator_traits<_ForwardIterator>::difference_type __n) noexcept { - return __brick_shift_left(__first, __last, __n, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_shift_left(__first, __last, __n, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - typename ::std::iterator_traits<_ForwardIterator>::difference_type __n, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, + typename ::std::iterator_traits<_RandomAccessIterator>::difference_type __n) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. if (__n <= 0) return __last; @@ -4154,47 +4282,51 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa if (__n >= __size) return __first; - using _DiffType = typename ::std::iterator_traits<_ForwardIterator>::difference_type; + using _DiffType = typename ::std::iterator_traits<_RandomAccessIterator>::difference_type; _DiffType __mid = __size / 2 + __size % 2; _DiffType __size_res = __size - __n; - //1. n >= size/2; there is enough memory to 'total' parallel copying - if (__n >= __mid) - { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __n, __size, - [__first, __n, __is_vector](_DiffType __i, _DiffType __j) { - __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, - __first + __i - __n, __is_vector); - }); - } - else //2. n < size/2; there is not enough memory to parallel copying; doing parallel copying by n elements - { - //TODO: to consider parallel processing by the 'internal' loop (but we may probably get cache locality issues) - for (auto __k = __n; __k < __size; __k += __n) + return __internal::__except_handler([&]() { + //1. n >= size/2; there is enough memory to 'total' parallel copying + if (__n >= __mid) { - auto __end = ::std::min(__k + __n, __size); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __k, __end, - [__first, __n, __is_vector](_DiffType __i, _DiffType __j) { - __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, - __first + __i - __n, __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __size, + [__first, __n](_DiffType __i, _DiffType __j) { + __brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __first + __i, __first + __j, __first + __i - __n, _IsVector{}); }); } - } + else //2. n < size/2; there is not enough memory to parallel copying; doing parallel copying by n elements + { + //TODO: to consider parallel processing by the 'internal' loop (but we may probably get cache locality issues) + for (auto __k = __n; __k < __size; __k += __n) + { + auto __end = ::std::min(__k + __n, __size); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __k, __end, + [__first, __n](_DiffType __i, _DiffType __j) { + __brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __first + __i, __first + __j, __first + __i - __n, _IsVector{}); + }); + } + } - return __first + __size_res; + return __first + __size_res; + }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, - typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n, - _IsVector __is_vector, _IsParallel is_parallel) +template +_BidirectionalIterator +__pattern_shift_right(_Tag __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __last, + typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n) { + static_assert(__is_host_dispatch_tag_v<_Tag>); + using _ReverseIterator = typename ::std::reverse_iterator<_BidirectionalIterator>; - auto __res = oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), - _ReverseIterator(__last), _ReverseIterator(__first), __n, - __is_vector, is_parallel); + + auto __res = oneapi::dpl::__internal::__pattern_shift_left( + __tag, ::std::forward<_ExecutionPolicy>(__exec), _ReverseIterator(__last), _ReverseIterator(__first), __n); return __res.base(); } diff --git a/include/oneapi/dpl/pstl/execution_defs.h b/include/oneapi/dpl/pstl/execution_defs.h index d16a030b216..26287ccbf6e 100644 --- a/include/oneapi/dpl/pstl/execution_defs.h +++ b/include/oneapi/dpl/pstl/execution_defs.h @@ -31,88 +31,20 @@ inline namespace v1 // 2.4, Sequential execution policy class sequenced_policy { - public: - // For internal use only - static constexpr ::std::false_type - __allow_unsequenced() - { - return ::std::false_type{}; - } - static constexpr ::std::false_type - __allow_vector() - { - return ::std::false_type{}; - } - static constexpr ::std::false_type - __allow_parallel() - { - return ::std::false_type{}; - } }; // 2.5, Parallel execution policy class parallel_policy { - public: - // For internal use only - static constexpr ::std::false_type - __allow_unsequenced() - { - return ::std::false_type{}; - } - static constexpr ::std::false_type - __allow_vector() - { - return ::std::false_type{}; - } - static constexpr ::std::true_type - __allow_parallel() - { - return ::std::true_type{}; - } }; // 2.6, Parallel+Vector execution policy class parallel_unsequenced_policy { - public: - // For internal use only - static constexpr ::std::true_type - __allow_unsequenced() - { - return ::std::true_type{}; - } - static constexpr ::std::true_type - __allow_vector() - { - return ::std::true_type{}; - } - static constexpr ::std::true_type - __allow_parallel() - { - return ::std::true_type{}; - } }; class unsequenced_policy { - public: - // For internal use only - static constexpr ::std::true_type - __allow_unsequenced() - { - return ::std::true_type{}; - } - static constexpr ::std::true_type - __allow_vector() - { - return ::std::true_type{}; - } - static constexpr ::std::false_type - __allow_parallel() - { - return ::std::false_type{}; - } }; // 2.8, Execution policy objects @@ -180,14 +112,6 @@ template using __enable_if_execution_policy = ::std::enable_if_t>, _T>; -template -using __enable_if_host_execution_policy = - ::std::enable_if_t<__is_host_execution_policy<::std::decay_t<_ExecPolicy>>::value, _T>; - -template -using __enable_if_host_execution_policy_conditional = - ::std::enable_if_t<__is_host_execution_policy<::std::decay_t<_ExecPolicy>>::value && __condition, _T>; - template struct __ref_or_copy_impl { @@ -213,6 +137,22 @@ __check_size(...) -> typename ::std::iterator_traits<_It>::difference_type; template using __difference_t = ::std::make_signed_t(0))>; +//------------------------------------------------------------------------ +// backend tags +//------------------------------------------------------------------------ + +struct __serial_backend_tag +{ +}; + +struct __tbb_backend_tag +{ +}; + +struct __omp_backend_tag +{ +}; + } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 70631a27114..133717bf68e 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -19,6 +19,7 @@ #include #include +#include "parallel_backend.h" #include "execution_defs.h" #include "iterator_defs.h" @@ -29,100 +30,134 @@ namespace dpl namespace __internal { -/* predicate */ +//------------------------------------------------------------------------ +// backend selector with tags +//------------------------------------------------------------------------ + +#if _ONEDPL_PAR_BACKEND_TBB +using __par_backend_tag = __tbb_backend_tag; +#elif _ONEDPL_PAR_BACKEND_OPENMP +using __par_backend_tag = __omp_backend_tag; +#elif _ONEDPL_PAR_BACKEND_SERIAL +using __par_backend_tag = __serial_backend_tag; +#else +# error "Parallel backend was not specified" +#endif + +template +struct __serial_tag +{ + using __is_vector = _IsVector; +}; -template -::std::false_type __lazy_and(_Tp, ::std::false_type) +template +struct __parallel_tag { - return ::std::false_type{}; -} + using __is_vector = _IsVector; + using __backend_tag = __par_backend_tag; +}; -template -inline _Tp -__lazy_and(_Tp __a, ::std::true_type) +struct __parallel_forward_tag { - return __a; + using __is_vector = ::std::false_type; + using __backend_tag = __par_backend_tag; +}; + +//---------------------------------------------------------- +// __select_backend (for the host policies) +//---------------------------------------------------------- + +template +using __parallel_policy_tag_selector_t = ::std::conditional_t< + __internal::__is_random_access_iterator_v<_IteratorTypes...>, __parallel_tag<_IsVector>, + ::std::conditional_t<__is_forward_iterator_v<_IteratorTypes...>, __parallel_forward_tag, __serial_tag<_IsVector>>>; + +template +__serial_tag +__select_backend(oneapi::dpl::execution::sequenced_policy, _IteratorTypes&&...) +{ + return {}; } -template -::std::true_type __lazy_or(_Tp, ::std::true_type) +template +__serial_tag<__internal::__is_random_access_iterator<_IteratorTypes...>> +__select_backend(oneapi::dpl::execution::unsequenced_policy, _IteratorTypes&&...) { - return ::std::true_type{}; + return {}; } -template -inline _Tp -__lazy_or(_Tp __a, ::std::false_type) +template +__parallel_policy_tag_selector_t +__select_backend(oneapi::dpl::execution::parallel_policy, _IteratorTypes&&...) { - return __a; + return {}; } -/* policy */ -template -struct __policy_traits +template +__parallel_policy_tag_selector_t<__internal::__is_random_access_iterator<_IteratorTypes...>, _IteratorTypes...> +__select_backend(oneapi::dpl::execution::parallel_unsequenced_policy, _IteratorTypes&&...) { -}; + return {}; +} -template <> -struct __policy_traits +//---------------------------------------------------------- +// __is_serial_tag, __is_serial_tag_v +//---------------------------------------------------------- + +template +struct __is_serial_tag : ::std::false_type { - typedef ::std::false_type __allow_parallel; - typedef ::std::false_type __allow_unsequenced; - typedef ::std::false_type __allow_vector; }; -template <> -struct __policy_traits +template +struct __is_serial_tag<__serial_tag<_IsVector>> : ::std::true_type { - typedef ::std::false_type __allow_parallel; - typedef ::std::true_type __allow_unsequenced; - typedef ::std::true_type __allow_vector; }; -template <> -struct __policy_traits +template +inline constexpr bool __is_serial_tag_v = __is_serial_tag<_Tag>::value; + +//---------------------------------------------------------- +// __is_parallel_forward_tag, __is_parallel_forward_tag_v +//---------------------------------------------------------- + +template +struct __is_parallel_forward_tag : ::std::false_type { - typedef ::std::true_type __allow_parallel; - typedef ::std::false_type __allow_unsequenced; - typedef ::std::false_type __allow_vector; }; template <> -struct __policy_traits +struct __is_parallel_forward_tag<__parallel_forward_tag> : ::std::true_type { - typedef ::std::true_type __allow_parallel; - typedef ::std::true_type __allow_unsequenced; - typedef ::std::true_type __allow_vector; }; -template -using __allow_vector = typename __internal::__policy_traits<::std::decay_t<_ExecutionPolicy>>::__allow_vector; - -template -using __allow_unsequenced = typename __internal::__policy_traits<::std::decay_t<_ExecutionPolicy>>::__allow_unsequenced; +template +inline constexpr bool __is_parallel_forward_tag_v = __is_parallel_forward_tag<_Tag>::value; -template -using __allow_parallel = typename __internal::__policy_traits<::std::decay_t<_ExecutionPolicy>>::__allow_parallel; +//---------------------------------------------------------- +// __is_parallel_tag, __is_parallel_tag_v +//---------------------------------------------------------- -template -auto -__is_vectorization_preferred(_ExecutionPolicy& __exec) - -> decltype(__internal::__lazy_and(__exec.__allow_vector(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>())) +template +struct __is_parallel_tag : ::std::false_type { - return __internal::__lazy_and(__exec.__allow_vector(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>()); -} +}; -template -auto -__is_parallelization_preferred(_ExecutionPolicy& __exec) - -> decltype(__internal::__lazy_and(__exec.__allow_parallel(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>())) +template +struct __is_parallel_tag<__parallel_tag<_IsVector>> : ::std::true_type { - return __internal::__lazy_and(__exec.__allow_parallel(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>()); -} +}; + +template +inline constexpr bool __is_parallel_tag_v = __is_parallel_tag<_Tag>::value; + +//---------------------------------------------------------- +// __is_host_dispatch_tag_v +//---------------------------------------------------------- + +template +inline constexpr bool __is_host_dispatch_tag_v = + __is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag> || __is_parallel_tag_v<_Tag>; } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop.h index 4a61dd7c09b..5d8802083d3 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop.h @@ -56,6 +56,9 @@ template void for_loop(_ExecutionPolicy&& __exec, type_identity_t<_Ip> __start, _Ip __finish, _Rest&&... __rest) { + static_assert(oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, + "for_loop is implemented for the host policies only"); + oneapi::dpl::__internal::__for_loop_repack(::std::forward<_ExecutionPolicy>(__exec), __start, __finish, oneapi::dpl::__internal::__single_stride_type{}, ::std::forward_as_tuple(::std::forward<_Rest>(__rest)...)); @@ -65,6 +68,9 @@ template __start, _Ip __finish, _Sp __stride, _Rest&&... __rest) { + static_assert(oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, + "for_loop_strided is implemented for the host policies only"); + oneapi::dpl::__internal::__for_loop_repack(::std::forward<_ExecutionPolicy>(__exec), __start, __finish, __stride, ::std::forward_as_tuple(::std::forward<_Rest>(__rest)...)); } @@ -73,6 +79,9 @@ template >::value, + "for_loop_n is implemented for the host policies only"); + oneapi::dpl::__internal::__for_loop_repack_n(::std::forward<_ExecutionPolicy>(__exec), __start, __n, oneapi::dpl::__internal::__single_stride_type{}, ::std::forward_as_tuple(::std::forward<_Rest>(__rest)...)); @@ -82,6 +91,9 @@ template >::value, + "for_loop_n_strided is implemented for the host policies only"); + oneapi::dpl::__internal::__for_loop_repack_n(::std::forward<_ExecutionPolicy>(__exec), __start, __n, __stride, ::std::forward_as_tuple(::std::forward<_Rest>(__rest)...)); } diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index d7f738036c3..47769c4645d 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -65,7 +65,7 @@ struct __difference<_Ip, ::std::enable_if_t<::std::is_integral_v<_Ip>>> template struct __difference<_Ip, ::std::enable_if_t>> { - using __type = typename oneapi::dpl::__internal::__iterator_traits<_Ip>::difference_type; + using __type = typename ::std::iterator_traits<_Ip>::difference_type; }; // This type is used as a stride value when it's known that stride == 1 at compile time(the case of for_loop and for_loop_n). @@ -232,9 +232,9 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function } template -::std::enable_if_t<::std::is_same_v::iterator_category, - ::std::bidirectional_iterator_tag>, - _IndexType> +::std::enable_if_t< + ::std::is_same_v::iterator_category, ::std::bidirectional_iterator_tag>, + _IndexType> __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pack& __pack, _IndexType) noexcept { _IndexType __ordinal_position = 0; @@ -269,11 +269,10 @@ __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pa } template -::std::enable_if_t<::std::is_same_v::iterator_category, - ::std::forward_iterator_tag> || - ::std::is_same_v::iterator_category, - ::std::input_iterator_tag>, - _IndexType> +::std::enable_if_t< + ::std::is_same_v::iterator_category, ::std::forward_iterator_tag> || + ::std::is_same_v::iterator_category, ::std::input_iterator_tag>, + _IndexType> __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pack& __pack, _IndexType) noexcept { _IndexType __ordinal_position = 0; @@ -398,26 +397,27 @@ __pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Functio // Create an identity pack object, operations are done on copies of it. const __pack_type __identity{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; + using __backend_tag = typename oneapi::dpl::__internal::__parallel_tag<_IsVector>::__backend_tag; oneapi::dpl::__internal::__except_handler([&]() { - return __par_backend::__parallel_reduce(::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, - [__is_vector, __first, __f](_Size __i, _Size __j, __pack_type __value) { - const auto __subseq_start = __first + __i; - const auto __length = __j - __i; - - oneapi::dpl::__internal::__brick_walk1( - __length, - [&__value, __f, __i, __subseq_start](_Size __idx) { - __value.__apply_func(__f, __subseq_start + __idx, - __i + __idx); - }, - __is_vector); - - return __value; - }, - [](__pack_type __lhs, const __pack_type& __rhs) { - __lhs.__combine(__rhs); - return __lhs; - }) + return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, + [__is_vector, __first, __f](_Size __i, _Size __j, __pack_type __value) { + const auto __subseq_start = __first + __i; + const auto __length = __j - __i; + + oneapi::dpl::__internal::__brick_walk1( + __length, + [&__value, __f, __i, __subseq_start](_Size __idx) { + __value.__apply_func(__f, __subseq_start + __idx, __i + __idx); + }, + __is_vector); + + return __value; + }, + [](__pack_type __lhs, const __pack_type& __rhs) { + __lhs.__combine(__rhs); + return __lhs; + }) .__finalize(__n); }); } @@ -433,9 +433,10 @@ __pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Functio // Create an identity pack object, operations are done on copies of it. const __pack_type __identity{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; + using __backend_tag = typename oneapi::dpl::__internal::__parallel_tag<_IsVector>::__backend_tag; oneapi::dpl::__internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, [__is_vector, __first, __f, __stride](_Size __i, _Size __j, __pack_type __value) { const auto __subseq_start = __first + __i * __stride; const auto __length = __j - __i; @@ -472,48 +473,25 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function // Helper structure to split code functions for integral and iterator types so the return // value can be successfully deduced. -template -struct __use_par_vec_helper; - template -struct __use_par_vec_helper<_Ip, ::std::enable_if_t<::std::is_integral_v<_Ip>>> +struct __use_par_vec_helper { - template - static constexpr auto - __use_vector(_ExecutionPolicy&& __exec) -> decltype(__exec.__allow_vector()) - { - return __exec.__allow_vector(); - } - - template - static constexpr auto - __use_parallel(_ExecutionPolicy&& __exec) -> decltype(__exec.__allow_parallel()) - { - return __exec.__allow_parallel(); - } -}; + using __it_type = std::conditional_t, _Ip*, _Ip>; -template -struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> -{ template static constexpr auto __use_vector(_ExecutionPolicy&& __exec) - -> decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec))) { - return oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec)); + using __tag_type = decltype(oneapi::dpl::__internal::__select_backend(__exec, std::declval<__it_type>())); + return typename __tag_type::__is_vector{}; } template static constexpr auto __use_parallel(_ExecutionPolicy&& __exec) - -> decltype(oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec))) { - return oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec)); + using __tag_type = decltype(oneapi::dpl::__internal::__select_backend(__exec, std::declval<__it_type>())); + return oneapi::dpl::__internal::__is_parallel_tag<__tag_type>{}; } }; @@ -521,7 +499,6 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> template auto __use_vectorization(_ExecutionPolicy&& __exec) - -> decltype(__use_par_vec_helper<_Ip>::__use_vector(::std::forward<_ExecutionPolicy>(__exec))) { return __use_par_vec_helper<_Ip>::__use_vector(::std::forward<_ExecutionPolicy>(__exec)); } @@ -529,7 +506,6 @@ __use_vectorization(_ExecutionPolicy&& __exec) template auto __use_parallelization(_ExecutionPolicy&& __exec) - -> decltype(__use_par_vec_helper<_Ip>::__use_parallel(::std::forward<_ExecutionPolicy>(__exec))) { return __use_par_vec_helper<_Ip>::__use_parallel(::std::forward<_ExecutionPolicy>(__exec)); } diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 952087c68ad..0d17726cc24 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -43,10 +43,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_any_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } // [alg.all_of] @@ -75,20 +75,20 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) { - oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - __exec.__allow_parallel()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __f); } template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f) { - return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __f, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_walk1_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __n, __f); } // [alg.find] @@ -97,10 +97,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_find_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_find_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } template @@ -129,12 +129,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_find_end( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __s_first); + + return oneapi::dpl::__internal::__pattern_find_end(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __s_last, __pred); } template @@ -152,12 +150,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_find_first_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __s_first); + + return oneapi::dpl::__internal::__pattern_find_first_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __s_last, __pred); } template @@ -175,22 +171,23 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) { typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; - return oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, ::std::equal_to<_ValueType>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__first_semantic()); + + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, ::std::equal_to<_ValueType>(), + oneapi::dpl::__internal::__first_semantic()); } template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__first_semantic()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred, + oneapi::dpl::__internal::__first_semantic()); } // [alg.count] @@ -203,12 +200,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy< _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + return oneapi::dpl::__internal::__pattern_count( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__equal_value>( - __value), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + __value)); } template @@ -216,10 +213,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy< _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_count( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_count(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } // [alg.search] @@ -229,12 +226,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_search( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __s_first); + + return oneapi::dpl::__internal::__pattern_search(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __s_first, __s_last, __pred); } template @@ -251,10 +246,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_search_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __count, __value, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_search_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __count, __value, __pred); } template @@ -272,11 +267,11 @@ template copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result) { + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy{}); } template @@ -285,11 +280,11 @@ copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _Forward { using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__brick_copy_n{}); } template @@ -297,12 +292,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_copy_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_copy_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __pred); } // [alg.swap] @@ -314,16 +307,14 @@ swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardItera { typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; - return oneapi::dpl::__internal::__pattern_swap( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, - [](_ReferenceType1 __x, _ReferenceType2 __y) { - using ::std::swap; - swap(__x, __y); - }, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_swap(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, [](_ReferenceType1 __x, _ReferenceType2 __y) { + using ::std::swap; + swap(__x, __y); + }); } // [alg.transform] @@ -333,12 +324,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - __exec.__allow_parallel()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}); } // we can't use non-const __op here @@ -348,12 +338,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator __result, _BinaryOperation __op) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + return oneapi::dpl::__internal::__pattern_walk3( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, - oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - __exec.__allow_parallel()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op))); } // [alg.transform_if] @@ -364,14 +353,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op, _UnaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2_transform_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_if_unary_functor<_UnaryOperation, _UnaryPredicate>(::std::move(__op), - ::std::move(__pred)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + ::std::move(__pred))); } template (__exec), __first1, __last1, __first2, __result, - oneapi::dpl::__internal::__transform_if_binary_functor<_BinaryOperation, _BinaryPredicate>(::std::move(__op), - ::std::move(__pred)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator3>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator3>(__exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + oneapi::dpl::__internal::__transform_if_binary_functor<_BinaryOperation, _BinaryPredicate>( + ::std::move(__op), ::std::move(__pred))); } // [alg.replace] @@ -397,13 +382,13 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, const _Tp& __new_value) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__replace_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, - oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred)); } template @@ -423,18 +408,16 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryPredicate __pred, const _Tp& __new_value) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__replace_copy_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, ::std::conditional_t, _UnaryPredicate, oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>>( - __new_value, __pred), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __new_value, __pred)); } template @@ -455,10 +438,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - oneapi::dpl::__internal::__pattern_fill( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __value, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_fill(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __value); } template @@ -468,10 +451,10 @@ fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const if (__count <= 0) return __first; - return oneapi::dpl::__internal::__pattern_fill_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __value, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_fill_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __count, __value); } // [alg.generate] @@ -479,10 +462,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) { - oneapi::dpl::__internal::__pattern_generate( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __g, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_generate(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __g); } template @@ -492,10 +475,10 @@ generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, _ if (__count <= 0) return __first; - return oneapi::dpl::__internal::__pattern_generate_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __g, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_generate_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __count, __g); } // [alg.remove] @@ -526,10 +509,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_remove_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_remove_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template @@ -548,10 +531,10 @@ template unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_unique( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_unique(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } template @@ -567,12 +550,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_unique_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_unique_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __pred); } template @@ -589,10 +570,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last) { - oneapi::dpl::__internal::__pattern_reverse( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_reverse(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last); } template @@ -600,12 +581,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, _ForwardIterator __d_first) { - return oneapi::dpl::__internal::__pattern_reverse_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __d_first); + + return oneapi::dpl::__internal::__pattern_reverse_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __d_first); } // [alg.rotate] @@ -614,10 +593,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { - return oneapi::dpl::__internal::__pattern_rotate( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_rotate(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last); } template @@ -625,12 +604,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __middle, _ForwardIterator1 __last, _ForwardIterator2 __result) { - return oneapi::dpl::__internal::__pattern_rotate_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __result, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_rotate_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __middle, __last, __result); } // [alg.partitions] @@ -639,20 +616,20 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_is_partitioned( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_is_partitioned(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_partition( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template @@ -660,10 +637,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Bidirec stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_stable_partition( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template (__exec), __first, __last, __out_true, __out_false, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, - _ForwardIterator2>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, - _ForwardIterator2>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __out_true, __out_false); + + return oneapi::dpl::__internal::__pattern_partition_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __out_true, __out_false, __pred); } // [alg.sort] @@ -687,12 +662,12 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _InputType; - oneapi::dpl::__internal::__pattern_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - typename ::std::is_move_constructible<_InputType>::type()); + + oneapi::dpl::__internal::__pattern_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __comp, typename ::std::is_move_constructible<_InputType>::type()); } template @@ -709,10 +684,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_stable_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); } template @@ -731,12 +706,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp) { - oneapi::dpl::__internal::__pattern_sort_by_key( - ::std::forward<_ExecutionPolicy>(__exec), __keys_first, __keys_last, __values_first, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __keys_first, __values_first); + + oneapi::dpl::__internal::__pattern_sort_by_key(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __keys_first, __keys_last, __values_first, __comp); } template @@ -756,12 +729,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_mismatch( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_mismatch(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __pred); } template @@ -802,10 +773,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __p) { - return oneapi::dpl::__internal::__pattern_equal( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __p, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __p); } template @@ -821,10 +792,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __p) { - return oneapi::dpl::__internal::__pattern_equal( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __p, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __p); } template @@ -843,11 +814,11 @@ move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __l { using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __d_first); + return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, - oneapi::dpl::__internal::__brick_move<_DecayedExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, + oneapi::dpl::__internal::__brick_move{}); } // [partial.sort] @@ -857,10 +828,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_partial_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_partial_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __comp); } template @@ -879,12 +850,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomA partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __d_first); + return oneapi::dpl::__internal::__pattern_partial_sort_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, - _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, - _RandomAccessIterator>(__exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, __comp); } template @@ -901,12 +870,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + const _ForwardIterator __res = oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__first_semantic()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__first_semantic()); return __res == __last ? __last : oneapi::dpl::__internal::__pstl_next(__res); } @@ -922,12 +890,12 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__or_semantic()) == __last; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), + oneapi::dpl::__internal::__or_semantic()) == __last; } template @@ -945,12 +913,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __d_first, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __d_first); + + return oneapi::dpl::__internal::__pattern_merge(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __d_first, __comp); } template @@ -967,10 +933,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_inplace_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_inplace_merge(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __comp); } template @@ -989,12 +955,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_includes( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_includes(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __comp); } template @@ -1014,12 +978,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_union( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return oneapi::dpl::__internal::__pattern_set_union(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template @@ -1039,12 +1001,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_intersection( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return oneapi::dpl::__internal::__pattern_set_intersection(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template @@ -1064,12 +1024,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return oneapi::dpl::__internal::__pattern_set_difference(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template @@ -1090,12 +1048,11 @@ set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + return oneapi::dpl::__internal::__pattern_set_symmetric_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp); } template @@ -1112,10 +1069,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_is_heap_until( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_is_heap_until(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template @@ -1130,10 +1087,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_is_heap( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_is_heap(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); } template @@ -1150,10 +1107,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_min_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_min_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template @@ -1185,10 +1142,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, ::std::pair<_ForwardIterator, _ForwardIterator>> minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_minmax_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_minmax_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template @@ -1206,10 +1163,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_nth_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __nth, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_nth_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __nth, __last, __comp); } template @@ -1228,12 +1185,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + return oneapi::dpl::__internal::__pattern_lexicographical_compare( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp); } template @@ -1252,10 +1207,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, typename ::std::iterator_traits<_ForwardIterator>::difference_type __n) { - return oneapi::dpl::__internal::__pattern_shift_left( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_shift_left(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __n); } // [shift.right] @@ -1265,10 +1220,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Bidirec shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n) { - return oneapi::dpl::__internal::__pattern_shift_right( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_shift_right(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __n); } } // namespace dpl diff --git a/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h index bbe5cc72ac8..4e51d0e4e2a 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h @@ -39,7 +39,9 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> any_of(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_any_of(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __pred); } @@ -70,7 +72,9 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> for_each(_ExecutionPolicy&& __exec, _Range&& __rng, _Function __f) { - oneapi::dpl::__internal::__ranges::__pattern_walk_n(::std::forward<_ExecutionPolicy>(__exec), __f, + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + oneapi::dpl::__internal::__ranges::__pattern_walk_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __f, views::all(::std::forward<_Range>(__rng))); } @@ -80,7 +84,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_find_if(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_find_if(__dispatch_tag, + ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __pred); } @@ -111,8 +118,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range1>> find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_find_end( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __pred); } @@ -132,8 +141,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range1>> find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_find_first_of( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __pred); } @@ -152,9 +163,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> adjacent_find(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + return oneapi::dpl::__internal::__ranges::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __pred, - oneapi::dpl::__internal::__first_semantic()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __pred, oneapi::dpl::__internal::__first_semantic()); } template @@ -172,7 +185,9 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> count_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_count(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_count(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __pred); } @@ -193,8 +208,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range1>> search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_search( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __pred); } @@ -211,9 +228,11 @@ template > search_n(_ExecutionPolicy&& __exec, _Range&& __rng, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_search_n(::std::forward<_ExecutionPolicy>(__exec), - views::all_read(::std::forward<_Range>(__rng)), - __count, __value, __pred); + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_search_n( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __count, __value, __pred); } template @@ -230,8 +249,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> copy(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result) { + auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__internal::__brick_copy{}, views::all_read(::std::forward<_Range1>(__rng)), views::all_write(::std::forward<_Range2>(__result))); } @@ -240,8 +262,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range2>> copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _Predicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + return oneapi::dpl::__internal::__ranges::__pattern_copy_if( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)), views::all_write(::std::forward<_Range2>(__result)), __pred, oneapi::dpl::__internal::__pstl_assign()); } @@ -252,11 +276,13 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range1>> swap_ranges(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + using _ReferenceType1 = oneapi::dpl::__internal::__value_t<_Range1>&; using _ReferenceType2 = oneapi::dpl::__internal::__value_t<_Range2>&; return oneapi::dpl::__internal::__ranges::__pattern_swap( - ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range1>(__rng1)), views::all(::std::forward<_Range2>(__rng2)), [](_ReferenceType1 __x, _ReferenceType2 __y) { using ::std::swap; swap(__x, __y); @@ -269,8 +295,10 @@ template transform(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _UnaryOperation __op) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), [__op](auto x, auto& z) { z = __op(x); }, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), [__op](auto x, auto& z) { z = __op(x); }, views::all_read(::std::forward<_Range1>(__rng)), views::all_write(::std::forward<_Range2>(__result))); } @@ -278,8 +306,10 @@ template transform(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __result, _BinaryOperation __op) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2, __result); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), [__op](auto x, auto y, auto& z) { z = __op(x, y); }, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), [__op](auto x, auto y, auto& z) { z = __op(x, y); }, views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), views::all_write(::std::forward<_Range3>(__result))); } @@ -290,8 +320,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> remove_if(_ExecutionPolicy&& __exec, _Range&& __rng, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_remove_if(::std::forward<_ExecutionPolicy>(__exec), - views::all(::std::forward<_Range>(__rng)), __pred); + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_remove_if( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range>(__rng)), __pred); } template @@ -332,7 +364,9 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> unique(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_unique(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_unique(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range>(__rng)), __pred); } @@ -349,8 +383,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range2>> unique_copy(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + return oneapi::dpl::__internal::__ranges::__pattern_unique_copy( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)), views::all_write(::std::forward<_Range2>(__result)), __pred, oneapi::dpl::__internal::__pstl_assign()); } @@ -406,8 +442,10 @@ template replace_if(_ExecutionPolicy&& __exec, _Range&& __rng, _UnaryPredicate __pred, const _Tp& __new_value) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__internal::__replace_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred), @@ -431,9 +469,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, replace_copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _UnaryPredicate __pred, const _Tp& __new_value) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + auto __src = views::all_read(::std::forward<_Range1>(__rng)); oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__internal::__replace_copy_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, ::std::conditional_t, @@ -463,7 +503,9 @@ template sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) { - oneapi::dpl::__internal::__ranges::__pattern_sort(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + oneapi::dpl::__internal::__ranges::__pattern_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range>(__rng)), __comp, __proj); } @@ -498,10 +540,12 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> is_sorted_until(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + auto __view = views::all_read(::std::forward<_Range>(__rng)); const auto __res = oneapi::dpl::__internal::__ranges::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __view, oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__first_semantic()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __view, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__first_semantic()); return __res == __view.size() ? __res : __res + 1; } @@ -518,9 +562,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_sorted(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + auto __view = views::all_read(::std::forward<_Range>(__rng)); return oneapi::dpl::__internal::__ranges::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __view, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __view, oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__or_semantic()) == __view.size(); } @@ -539,7 +585,9 @@ template equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryPredicate __p) { - return oneapi::dpl::__internal::__ranges::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + + return oneapi::dpl::__internal::__ranges::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __p); } @@ -558,10 +606,13 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> move(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2) { + auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__internal::__brick_move<_DecayedExecutionPolicy>{}, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__internal::__brick_move{}, views::all_read(::std::forward<_Range1>(__rng1)), views::all_write(::std::forward<_Range2>(__rng2))); } @@ -572,8 +623,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range3>> merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2, __rng3); + return oneapi::dpl::__internal::__ranges::__pattern_merge( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), views::all_write(::std::forward<_Range3>(__rng3)), __comp); } @@ -593,8 +646,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + return oneapi::dpl::__internal::__ranges::__pattern_min_element( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __comp); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __comp); } template @@ -628,8 +684,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy< ::std::pair, oneapi::dpl::__internal::__difference_t<_Range>>> minmax_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + return oneapi::dpl::__internal::__ranges::__pattern_minmax_element( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __comp); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __comp); } template @@ -649,8 +708,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op) { + const auto __dispatch_tag = + oneapi::dpl::__ranges::__select_backend(__exec, __keys, __values, __out_keys, __out_values); + return oneapi::dpl::__internal::__ranges::__pattern_reduce_by_segment( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__keys)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__keys)), views::all_read(::std::forward<_Range2>(__values)), views::all_write(::std::forward<_Range3>(__out_keys)), views::all_write(::std::forward<_Range4>(__out_values)), __binary_pred, __binary_op); } diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 082856131e7..fac93889dfb 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -45,25 +45,19 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy{}); } else { return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}); } } @@ -75,25 +69,19 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__brick_copy_n{}); } else { return oneapi::dpl::__internal::__pattern_walk2_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}); } } @@ -107,25 +95,19 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy{}); } else { return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}); } } @@ -137,25 +119,19 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__brick_copy_n{}); } else { return oneapi::dpl::__internal::__pattern_walk2_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}); } } @@ -168,24 +144,20 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); if constexpr (::std::is_arithmetic_v<_ValueType>) { oneapi::dpl::__internal::__pattern_walk_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__brick_fill{ + _ValueType(__value)}); } else { oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}); } } @@ -196,27 +168,46 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); if constexpr (::std::is_arithmetic_v<_ValueType>) { return oneapi::dpl::__internal::__pattern_walk_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__brick_fill_n{ + _ValueType(__value)}); } else { return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}); } } +#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) + +const oneapi::dpl::execution::parallel_policy& +get_unvectorized_policy(const oneapi::dpl::execution::parallel_unsequenced_policy&) +{ + return oneapi::dpl::execution::par; +} + +const oneapi::dpl::execution::sequenced_policy& +get_unvectorized_policy(const oneapi::dpl::execution::unsequenced_policy&) +{ + return oneapi::dpl::execution::seq; +} + +template +const _ExecutionPolicy& +get_unvectorized_policy(const _ExecutionPolicy& __exec) +{ + return __exec; +} + +#endif // (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) + // [specialized.destroy] template @@ -226,25 +217,17 @@ destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - using _is_vector_type = + if constexpr (!::std::is_trivially_destructible_v<_ValueType>) + { + const auto __dispatch_tag = #if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) - ::std::conditional_t< - oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, - ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>( - __exec))>; + oneapi::dpl::__internal::__select_backend(get_unvectorized_policy(__exec), __first); #else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); -#endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN - constexpr _is_vector_type __is_vector; + oneapi::dpl::__internal::__select_backend(__exec, __first); +#endif - if constexpr (!::std::is_trivially_destructible_v<_ValueType>) - { - oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [](_ReferenceType __val) { __val.~_ValueType(); }, __is_vector, __is_parallel); + oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, [](_ReferenceType __val) { __val.~_ValueType(); }); } } @@ -255,29 +238,22 @@ destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - using _is_vector_type = -#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) - ::std::conditional_t< - oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, - ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>( - __exec))>; -#else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); -#endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN - constexpr _is_vector_type __is_vector; - if constexpr (::std::is_trivially_destructible_v<_ValueType>) { return oneapi::dpl::__internal::__pstl_next(__first, __n); } else { - return oneapi::dpl::__internal::__pattern_walk1_n(::std::forward<_ExecutionPolicy>(__exec), __first, __n, - [](_ReferenceType __val) { __val.~_ValueType(); }, - __is_vector, __is_parallel); + const auto __dispatch_tag = +#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) + oneapi::dpl::__internal::__select_backend(get_unvectorized_policy(__exec), __first); +#else + oneapi::dpl::__internal::__select_backend(__exec, __first); +#endif + + return oneapi::dpl::__internal::__pattern_walk1_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __n, + [](_ReferenceType __val) { __val.~_ValueType(); }); } } @@ -290,17 +266,13 @@ uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - if constexpr (!::std::is_trivial_v<_ValueType>) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}); } } @@ -311,21 +283,17 @@ uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - if constexpr (::std::is_trivial_v<_ValueType>) { return oneapi::dpl::__internal::__pstl_next(__first, __n); } else { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}); } } @@ -338,24 +306,20 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); if constexpr (::std::is_trivial_v<_ValueType>) { oneapi::dpl::__internal::__pattern_walk_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType()}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__brick_fill{ + _ValueType()}); } else { oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}); } } @@ -366,24 +330,20 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); if constexpr (::std::is_trivial_v<_ValueType>) { return oneapi::dpl::__internal::__pattern_walk_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType()}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__brick_fill_n{ + _ValueType()}); } else { return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}); } } diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index f2564db3132..17ed09d0ca4 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -70,13 +70,12 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward _ForwardIterator2 __first2, _Tp __init) { typedef typename ::std::iterator_traits<_ForwardIterator1>::value_type _InputType; + + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, ::std::plus<_InputType>(), - ::std::multiplies<_InputType>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, + ::std::plus<_InputType>(), ::std::multiplies<_InputType>()); } template transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { - return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, __binary_op2, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_transform_reduce(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __init, __binary_op1, + __binary_op2); } template @@ -98,10 +96,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { - return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, __unary_op, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_transform_reduce(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __init, __binary_op, __unary_op); } // [exclusive.scan] @@ -225,13 +223,11 @@ transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { - return oneapi::dpl::__internal::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, - /*inclusive=*/::std::false_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __init, __binary_op, + /*inclusive=*/::std::false_type()); } // [transform.inclusive.scan] @@ -243,13 +239,11 @@ transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _Tp __init) { - return oneapi::dpl::__internal::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, - /*inclusive=*/::std::true_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __init, __binary_op, + /*inclusive=*/::std::true_type()); } template (__exec), __first, __last, __result, __unary_op, __binary_op, - /*inclusive=*/::std::true_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __binary_op, + /*inclusive=*/::std::true_type()); } // [adjacent.difference] @@ -274,16 +266,13 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first, _BinaryOperation __op) { - if (__first == __last) return __d_first; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __d_first); + return oneapi::dpl::__internal::__pattern_adjacent_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __op, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __op); } template diff --git a/include/oneapi/dpl/pstl/glue_numeric_ranges_impl.h b/include/oneapi/dpl/pstl/glue_numeric_ranges_impl.h index 42d7c6e15a3..521ebee46b7 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_ranges_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_ranges_impl.h @@ -63,9 +63,11 @@ template transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + using _ValueType = oneapi::dpl::__internal::__value_t<_Range1>; return oneapi::dpl::__internal::__ranges::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __init, ::std::plus<_ValueType>(), ::std::multiplies<_ValueType>()); } @@ -76,8 +78,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __init, __binary_op1, __binary_op2); } @@ -86,9 +90,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { - return oneapi::dpl::__internal::__ranges::__pattern_transform_reduce(::std::forward<_ExecutionPolicy>(__exec), - views::all_read(::std::forward<_Range>(__rng)), - __init, __binary_op, __unary_op); + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_transform_reduce( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __init, __binary_op, __unary_op); } // [exclusive.scan] @@ -154,8 +160,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, transform_exclusive_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_write(::std::forward<_Range2>(__rng2)), __unary_op, __init, __binary_op, /*inclusive=*/::std::false_type()); } @@ -169,8 +177,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, transform_inclusive_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _Tp __init) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_write(::std::forward<_Range2>(__rng2)), __unary_op, __init, __binary_op, /*inclusive=*/::std::true_type()); } @@ -182,8 +192,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, transform_inclusive_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_write(::std::forward<_Range2>(__rng2)), __unary_op, __binary_op, /*inclusive=*/::std::true_type()); } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 677be98e975..4ffc45b3047 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -39,10 +39,10 @@ namespace __internal // walk1 //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +void +__pattern_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) { auto __n = __last - __first; if (__n <= 0) @@ -52,9 +52,8 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); auto __buf = __keep(__first, __last); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, __exec, unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()) .wait(); } @@ -62,13 +61,13 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte // walk1_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_ForwardIterator +__pattern_walk1_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, + _Function __f) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f, - /*vector=*/::std::true_type(), /*parallel=*/::std::true_type()); + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); return __first + __n; } @@ -82,10 +81,11 @@ __pattern_walk1_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, + typename _Function> +_ForwardIterator2 +__pattern_walk2(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { auto __n = __last1 - __first1; if (__n <= 0) @@ -98,8 +98,8 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI auto __buf2 = __keep2(__first2, __first2 + __n); auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view()); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); if constexpr (_IsSync()) __future_obj.wait(); @@ -107,45 +107,42 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI return __first2 + __n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Function __f, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_ForwardIterator2 +__pattern_walk2_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, + _ForwardIterator2 __first2, _Function __f) { - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f, - ::std::true_type(), ::std::true_type()); + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f); } //------------------------------------------------------------------------ // swap //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, /*is_vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) +template +_ForwardIterator2 +__pattern_swap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), - __first1, __last1, __first2, __f, - ::std::true_type(), ::std::true_type()); + __par_backend_hetero::access_mode::read_write>( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); } //------------------------------------------------------------------------ // walk3 //------------------------------------------------------------------------ -template <__par_backend_hetero::access_mode __acc_mode1 = __par_backend_hetero::access_mode::read, +template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +_ForwardIterator3 +__pattern_walk3(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) { auto __n = __last1 - __first1; if (__n <= 0) @@ -158,7 +155,7 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode3, _ForwardIterator3>(); auto __buf3 = __keep3(__first3, __first3 + __n); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view(), __buf3.all_view()) .wait(); @@ -175,18 +172,18 @@ struct __walk_brick_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - /*parallel=*/::std::true_type) +template +void +__pattern_walk_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) { if (__last - __first <= 0) return; __pattern_walk1( + __tag, __par_backend_hetero::make_wrapped_policy<__walk_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first, __last, __f, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + __first, __last, __f); } template @@ -194,15 +191,16 @@ struct __walk_brick_n_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f, - /*parallel=*/::std::true_type) +template +_ForwardIterator +__pattern_walk_brick_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, + _Function __f) { __pattern_walk1( + __tag, __par_backend_hetero::make_wrapped_policy<__walk_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first, __first + __n, __f, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + __first, __first + __n, __f); return __first + __n; } @@ -215,15 +213,16 @@ struct __walk2_brick_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel*/ ::std::true_type) +template +_ForwardIterator2 +__pattern_walk2_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { return __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __brick, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __first1, __last1, __first2, __brick); } template @@ -231,17 +230,16 @@ struct __walk2_brick_n_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Brick __brick, /*parallel*/ ::std::true_type) +template +_ForwardIterator2 +__pattern_walk2_brick_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _Size __n, _ForwardIterator2 __first2, _Brick __brick) { - return __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first1, __first1 + __n, __first2, __brick, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __first1, __first1 + __n, __first2, __brick); } //------------------------------------------------------------------------ @@ -253,21 +251,20 @@ struct __walk2_transform_if_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, - /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_ForwardIterator2 +__pattern_walk2_transform_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __func) { // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming // values of the output sequence for elements where the predicate is false. return __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_transform_if_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __func, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __first1, __last1, __first2, __func); } template @@ -275,22 +272,21 @@ struct __walk3_transform_if_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_ForwardIterator3 +__pattern_walk3_transform_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, + _Function __func) { // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming // values of the output sequence for elements where the predicate is false. - return __pattern_walk3<__par_backend_hetero::access_mode::read, __par_backend_hetero::access_mode::read, - __par_backend_hetero::access_mode::read_write>( + return __pattern_walk3<_BackendTag, __par_backend_hetero::access_mode::read, + __par_backend_hetero::access_mode::read, __par_backend_hetero::access_mode::read_write>( + __tag, __par_backend_hetero::make_wrapped_policy<__walk3_transform_if_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __first3, __func, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __first1, __last1, __first2, __first3, __func); } //------------------------------------------------------------------------ @@ -309,18 +305,26 @@ struct fill_functor } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_ForwardIterator +__pattern_fill(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, const _T& __value) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - fill_functor<_T>{__value}, ::std::true_type{}, ::std::true_type{}); + fill_functor<_T>{__value}); return __last; } +template +_ForwardIterator +__pattern_fill_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, + const _T& __value) +{ + return __pattern_fill(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __value); +} + //------------------------------------------------------------------------ // generate //------------------------------------------------------------------------ @@ -338,24 +342,32 @@ struct generate_functor } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_ForwardIterator +__pattern_generate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Generator __g) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - generate_functor<_Generator>{__g}, ::std::true_type{}, ::std::true_type{}); + generate_functor<_Generator>{__g}); return __last; } +template +_ForwardIterator +__pattern_generate_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _Size __count, _Generator __g) +{ + return __pattern_generate(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __g); +} + //------------------------------------------------------------------------ // brick_copy, brick_move //------------------------------------------------------------------------ -template -struct __brick_copy_n<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_copy_n<__hetero_tag<_BackendTag>, _ExecutionPolicy> { template void @@ -365,48 +377,46 @@ struct __brick_copy_n<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_het } }; -template -struct __brick_copy<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy> { template - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> + void operator()(_SourceT&& __source, _TargetT&& __target) const { __target = ::std::forward<_SourceT>(__source); } }; -template -struct __brick_move<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy> { template - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> + void operator()(_SourceT&& __source, _TargetT&& __target) const { __target = ::std::move(__source); } }; -template -struct __brick_fill<_SourceT, _ExecutionPolicy, - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_fill<__hetero_tag<_BackendTag>, _ExecutionPolicy, _SourceT> { _SourceT __value; template - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> + void operator()(_TargetT& __target) const { __target = __value; } }; -template -struct __brick_fill_n<_SourceT, _ExecutionPolicy, - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_fill_n<__hetero_tag<_BackendTag>, _ExecutionPolicy, _SourceT> { _SourceT __value; template - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> + void operator()(_TargetT& __target) const { __target = __value; @@ -417,10 +427,10 @@ struct __brick_fill_n<_SourceT, _ExecutionPolicy, // min_element, max_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_min_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp) { if (__first == __last) return __last; @@ -467,7 +477,7 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ auto __buf = __keep(__first, __last); auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -493,10 +503,10 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ // However the solution requires use of custom pattern or substantial redesign of existing parallel_transform_reduce. // -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator, _Iterator>> -__pattern_minmax_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +::std::pair<_Iterator, _Iterator> +__pattern_minmax_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp) { if (__first == __last) return ::std::make_pair(__first, __first); @@ -535,7 +545,7 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator auto __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -547,11 +557,10 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator // adjacent_find //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type, - oneapi::dpl::__internal::__or_semantic) +template +_Iterator +__pattern_adjacent_find(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __predicate, oneapi::dpl::__internal::__or_semantic) { if (__last - __first < 2) return __last; @@ -564,11 +573,11 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf2 = __keep2(__first + 1, __last); - // TODO: in case of confilicting names + // TODO: in case of conflicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() bool result = __par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, - __par_backend_hetero::__parallel_or_tag{}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); // inverted conditional because of @@ -576,11 +585,10 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator return result ? __first : __last; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type, - oneapi::dpl::__internal::__first_semantic) +template +_Iterator +__pattern_adjacent_find(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __predicate, oneapi::dpl::__internal::__first_semantic) { if (__last - __first < 2) return __last; @@ -589,7 +597,7 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, adjacent_find_fn<_BinaryPredicate>>; auto __result = __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first + 1)), @@ -609,11 +617,10 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator // count, count_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_Iterator>::difference_type> -__pattern_count(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type) +template +typename ::std::iterator_traits<_Iterator>::difference_type +__pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Predicate __predicate) { if (__first == __last) return 0; @@ -632,7 +639,7 @@ __pattern_count(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -642,10 +649,10 @@ __pattern_count(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, // any_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +bool +__pattern_any_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Pred __pred) { if (__first == __last) return false; @@ -656,6 +663,7 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, __par_backend_hetero::__parallel_or_tag{}, __buf.all_view()); @@ -665,11 +673,10 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, // equal //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +bool +__pattern_equal(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Pred __pred) { if (__last1 == __first1 || __last2 == __first2 || __last1 - __first1 != __last2 - __first2) return false; @@ -681,10 +688,10 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); auto __buf2 = __keep2(__first2, __last2); - // TODO: in case of confilicting names + // TODO: in case of conflicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !__par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); } @@ -693,24 +700,23 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las // equal version for sequences with equal length //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +bool +__pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Pred __pred) { - return oneapi::dpl::__internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __first2, __first2 + (__last1 - __first1), __pred, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + return oneapi::dpl::__internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __first2, __first2 + (__last1 - __first1), __pred); } //------------------------------------------------------------------------ // find_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_find_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator +__pattern_find_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Pred __pred) { if (__first == __last) return __last; @@ -718,7 +724,7 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__pred}, ::std::true_type{}); @@ -728,18 +734,18 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last // find_end //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator1 +__pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) { if (__first == __last || __s_last == __s_first || __last - __first < __s_last - __s_first) return __last; if (__last - __first == __s_last - __s_first) { - const bool __res = __pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred, - ::std::true_type(), ::std::true_type()); + const bool __res = + __pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred); return __res ? __first : __last; } else @@ -747,7 +753,7 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __l using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -760,10 +766,10 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __l // find_first_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator1 +__pattern_find_first_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) { if (__first == __last || __s_last == __s_first) return __last; @@ -773,7 +779,7 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator // TODO: To check whether it makes sense to iterate over the second sequence in case of // distance(__first, __last) < distance(__s_first, __s_last). return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -790,10 +796,10 @@ class equal_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator1 +__pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) { if (__s_last == __s_first) return __first; @@ -804,14 +810,14 @@ __pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __las if (__last - __first == __s_last - __s_first) { const bool __res = __pattern_equal( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __first, - __last, __s_first, __pred, ::std::true_type(), ::std::true_type()); + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __first, __last, __s_first, __pred); return __res ? __first : __last; } using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -837,10 +843,11 @@ struct __search_n_unary_predicate } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Size __count, const _Tp& __value, - _BinaryPredicate __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator +__pattern_search_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Size __count, const _Tp& __value, _BinaryPredicate __pred) { if (__count <= 0) return __first; @@ -850,16 +857,15 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __las if (__last - __first == __count) { - return (!__internal::__pattern_any_of(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __search_n_unary_predicate<_Tp, _BinaryPredicate>{__value, __pred}, - ::std::true_type{}, ::std::true_type{})) + return (!__internal::__pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __search_n_unary_predicate<_Tp, _BinaryPredicate>{__value, __pred})) ? __first : __last; } using _Predicate = unseq_backend::n_elem_match_pred<_ExecutionPolicy, _BinaryPredicate, _Tp, _Size>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__pred, __value, __count}, ::std::true_type{}); @@ -869,10 +875,10 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __las // mismatch //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator1, _Iterator2>> -__pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +::std::pair<_Iterator1, _Iterator2> +__pattern_mismatch(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Pred __pred) { auto __n = ::std::min(__last1 - __first1, __last2 - __first2); if (__n <= 0) @@ -883,9 +889,9 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __ auto __first_zip = __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2)); - auto __result = - __par_backend_hetero::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, - _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); + auto __result = __par_backend_hetero::__parallel_find( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, + _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); __n = __result - __first_zip; return ::std::make_pair(__first1 + __n, __first2 + __n); } @@ -894,12 +900,11 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __ // copy_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy< - _ExecutionPolicy, ::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type>> -__pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _IteratorOrTuple __output_first, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +template +::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type> +__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; @@ -914,18 +919,19 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __ oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>(); auto __buf2 = __keep2(__output_first, __output_first + __n); - auto __res = - __par_backend_hetero::__parallel_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __n, __create_mask_op, __copy_by_mask_op); + auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, __create_mask_op, + __copy_by_mask_op); ::std::size_t __num_copied = __res.get(); return ::std::make_pair(__output_first + __n, __num_copied); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result_first, - _Predicate __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator2 +__pattern_copy_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __result_first, _Predicate __pred) { using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; @@ -939,10 +945,10 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __la auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); auto __buf2 = __keep2(__result_first, __result_first + __n); - auto __res = __par_backend_hetero::__parallel_copy_if(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __n, __pred); + auto __res = __par_backend_hetero::__parallel_copy_if(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, __pred); - ::std::size_t __num_copied = __res.get(); + ::std::size_t __num_copied = __res.get(); //is a blocking call return __result_first + __num_copied; } @@ -950,12 +956,11 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __la // partition_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator2, _Iterator3>> -__pattern_partition_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result1, - _Iterator3 __result2, _UnaryPredicate __pred, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) +template +::std::pair<_Iterator2, _Iterator3> +__pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result1, _Iterator3 __result2, _UnaryPredicate __pred) { if (__first == __last) return ::std::make_pair(__result1, __result2); @@ -967,7 +972,7 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterato unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ ::std::true_type> __copy_by_mask_op{_ReduceOp{}}; auto __result = __pattern_scan_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)), @@ -980,10 +985,11 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterato // unique_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_unique_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result_first, - _BinaryPredicate __pred, /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator2 +__pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __result_first, _BinaryPredicate __pred) { using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, oneapi::dpl::__internal::__pstl_assign, @@ -992,8 +998,8 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ __not_pred<_BinaryPredicate>{__pred}}; - auto __result = __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result_first, - __create_mask_op, __copy_by_mask_op); + auto __result = __pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result_first, __create_mask_op, __copy_by_mask_op); return __result_first + __result.second; } @@ -1007,10 +1013,10 @@ class copy_back_wrapper2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_remove_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Predicate __pred) { if (__last == __first) return __last; @@ -1019,19 +1025,22 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __la oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - auto __copy_last = __pattern_copy_if(__exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); - //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + auto __copy_last = __pattern_copy_if(__tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); + + //TODO: To optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer + // __pattern_copy_if above may be async due to there is implicit synchronization on sycl::buffer and the accessors + + //An explicit wait isn't required here because we have implicit synchronization on sycl::buffer destructor. + return __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __pred) { if (__last - __first < 2) return __last; @@ -1040,14 +1049,15 @@ __pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - auto __copy_last = __pattern_unique_copy(__exec, __first, __last, __copy_first, __pred, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + auto __copy_last = __pattern_unique_copy(__tag, __exec, __first, __last, __copy_first, __pred); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } //------------------------------------------------------------------------ @@ -1062,10 +1072,10 @@ enum _IsPartitionedReduceType : signed char __true_false }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type) +template +bool +__pattern_is_partitioned(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Predicate __predicate) { if (__last - __first < 2) return true; @@ -1086,7 +1096,7 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -1113,10 +1123,10 @@ struct __is_heap_check } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, /* vector */ ::std::true_type, /* parallel = */ ::std::true_type) +template +_RandomAccessIterator +__pattern_is_heap_until(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { if (__last - __first < 2) return __last; @@ -1125,16 +1135,16 @@ __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}, ::std::true_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, /* vector */ ::std::true_type, /* parallel = */ ::std::true_type) +template +bool +__pattern_is_heap(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { if (__last - __first < 2) return true; @@ -1143,7 +1153,7 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return !__par_backend_hetero::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); } @@ -1151,11 +1161,12 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran //------------------------------------------------------------------------ // merge //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator3> -__pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Iterator3 __d_first, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) + +template +_Iterator3 +__pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Iterator3 __d_first, _Compare __comp) { auto __n1 = __last1 - __first1; auto __n2 = __last2 - __first2; @@ -1166,16 +1177,18 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las //To consider the direct copying pattern call in case just one of sequences is empty. if (__n1 == 0) oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - ::std::true_type()); + __first2, __last2, __d_first, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); else if (__n2 == 0) oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - ::std::true_type()); + __first1, __last1, __d_first, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); else { auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); @@ -1186,19 +1199,21 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); auto __buf3 = __keep3(__d_first, __d_first + __n); - __par_backend_hetero::__parallel_merge(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __buf3.all_view(), __comp) + __par_backend_hetero::__parallel_merge(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __buf3.all_view(), __comp) .wait(); } return __d_first + __n; } + //------------------------------------------------------------------------ // inplace_merge //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __middle, _Iterator __last, - _Compare __comp, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + +template +void +__pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __middle, _Iterator __last, _Compare __comp) { using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; @@ -1212,26 +1227,28 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator auto __copy_first = __buf.get(); auto __copy_last = __copy_first + __n; - __pattern_merge(__exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), - __comp, ::std::true_type{}, ::std::true_type{}); + __pattern_merge( + __tag, __exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), __comp); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + + //An explicit wait isn't required here because we have implicit synchronization on sycl::buffer destructor. + __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ -template +template void -__stable_sort_with_projection(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - _Proj __proj) +__stable_sort_with_projection(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp, _Proj __proj) { if (__last - __first < 2) return; @@ -1239,60 +1256,58 @@ __stable_sort_with_projection(_ExecutionPolicy&& __exec, _Iterator __first, _Ite auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - __par_backend_hetero::__parallel_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __comp, __proj).wait(); + __par_backend_hetero::__parallel_stable_sort(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __comp, __proj) + .wait(); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type) +template +void +__pattern_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp, /*is_move_constructible=*/::std::true_type) { - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::identity{}); } //------------------------------------------------------------------------ // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + +template +void +__pattern_stable_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp) { - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::identity{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _Iterator1 __keys_first, _Iterator1 __keys_last, - _Iterator2 __values_first, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +void +__pattern_sort_by_key(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __keys_first, + _Iterator1 __keys_last, _Iterator2 __values_first, _Compare __comp) { - static_assert(::std::is_move_constructible_v::value_type> - && ::std::is_move_constructible_v::value_type>, - "The keys and values should be move constructible in case of parallel execution."); + static_assert(::std::is_move_constructible_v::value_type> && + ::std::is_move_constructible_v::value_type>, + "The keys and values should be move constructible in case of parallel execution."); auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __comp, [](const auto& __a) { return ::std::get<0>(__a); }); } - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_stable_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _UnaryPredicate __pred) { if (__last == __first) return __last; else if (__last - __first < 2) - return __pattern_any_of(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, ::std::true_type(), - ::std::true_type()) - ? __last - : __first; + return __pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred) ? __last + : __first; using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; @@ -1303,42 +1318,43 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterat auto __true_result = __true_buf.get(); auto __false_result = __false_buf.get(); - auto copy_result = __pattern_partition_copy(__exec, __first, __last, __true_result, __false_result, __pred, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + auto copy_result = __pattern_partition_copy(__tag, __exec, __first, __last, __true_result, __false_result, __pred); auto true_count = copy_result.first - __true_result; //TODO: optimize copy back if possible (inplace, decrease number of submits) - __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(__exec), - __true_result, copy_result.first, __first, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); - __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __false_result, copy_result.second, __first + true_count, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); + __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, copy_result.first, + __first, __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); + + //We don't need synchronization between these patterns due to the data are being processed independently. + + __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __false_result, copy_result.second, __first + true_count, + __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); + + //An explicit wait isn't required here because we have implicit synchronization on sycl::buffer destructor. return __first + true_count; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _UnaryPredicate __pred) { //TODO: consider nonstable approaches - return __pattern_stable_partition(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - ::std::true_type(), ::std::true_type()); + return __pattern_stable_partition(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); } //------------------------------------------------------------------------ // lexicographical_compare //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, - _Iterator2 __first2, _Iterator2 __last2, _Compare __comp, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) +template +bool +__pattern_lexicographical_compare(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first1, + _Iterator1 __last1, _Iterator2 __first2, _Iterator2 __last2, _Compare __comp) { //trivial pre-checks if (__first2 == __last2) @@ -1375,7 +1391,7 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _Iterator1 __first1 auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf1.all_view(), __buf2.all_view()) .get(); @@ -1383,11 +1399,11 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _Iterator1 __first1 return __ret_idx ? __ret_idx == 1 : (__last1 - __first1) < (__last2 - __first2); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +bool +__pattern_includes(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) { //according to the spec if (__first2 == __last2) @@ -1403,7 +1419,7 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa using __brick_include_type = unseq_backend::__brick_includes<_ExecutionPolicy, _Compare, _Size1, _Size2>; return !__par_backend_hetero::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), @@ -1414,16 +1430,17 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa //------------------------------------------------------------------------ // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + +template +void +__pattern_partial_sort(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, + _Iterator __last, _Compare __comp) { if (__last - __first < 2) return; __par_backend_hetero::__parallel_partial_sort( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__last), __comp) @@ -1459,11 +1476,11 @@ struct __partial_sort_2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutIterator> -__pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InIterator __last, - _OutIterator __out_first, _OutIterator __out_last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_OutIterator +__pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _InIterator __first, + _InIterator __last, _OutIterator __out_first, _OutIterator __out_last, _Compare __comp) { using _ValueType = typename ::std::iterator_traits<_InIterator>::value_type; @@ -1480,14 +1497,21 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI { // If our output buffer is larger than the input buffer, simply copy elements to the output and use // full sort on them. - auto __out_end = __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, __out_first, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + auto __out_end = + __pattern_walk2(__tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, + __out_first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); - // Use reqular sort as partial_sort isn't required to be stable + // TODO: __pattern_walk2 is a blocking call here, so there is a synchronization between the patterns. + // But, when the input iterators are a kind of hetero iterator on top of sycl::buffer, SYCL + // runtime makes a dependency graph. In that case the call of __pattern_walk2 could be changed to + // be asynchronous for better performance. + + // Use regular sort as partial_sort isn't required to be stable. + //__pattern_sort is a blocking call. __pattern_sort( + __tag, __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), - __out_first, __out_end, __comp, ::std::true_type{}, ::std::true_type{}, ::std::true_type{}); + __out_first, __out_end, __comp, ::std::true_type{}); return __out_end; } @@ -1500,22 +1524,28 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __in_size); auto __buf_first = __buf.get(); + auto __buf_last = __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, __buf_first, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, __buf_first, + __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); auto __buf_mid = __buf_first + __out_size; + // An explicit wait between the patterns isn't required here because we are working a with temporary + // sycl::buffer and sycl accessors. SYCL runtime makes a dependency graph to prevent the races between + // the patterns: __pattern_walk2, __parallel_partial_sort and __pattern_walk2. + __par_backend_hetero::__parallel_partial_sort( - __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); - return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), - __buf_first, __buf_mid, __out_first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); + return __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), + __buf_first, __buf_mid, __out_first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); + + //An explicit wait isn't required here because we have implicit synchronization on sycl::buffer destructor. } } @@ -1523,10 +1553,10 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __nth, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +void +__pattern_nth_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __nth, + _Iterator __last, _Compare __comp) { if (__first == __last || __nth == __last) return; @@ -1534,17 +1564,16 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ // TODO: check partition-based implementation // - try to avoid host dereference issue // - measure performance of the issue-free implementation - __pattern_partial_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp, - /*vector*/ ::std::true_type{}, /*parallel*/ ::std::true_type{}); + __pattern_partial_sort(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp); } //------------------------------------------------------------------------ // reverse //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) + +template +void +__pattern_reverse(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last) { auto __n = __last - __first; if (__n <= 0) @@ -1553,7 +1582,7 @@ __pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__reverse_functor::difference_type>{__n}, __n / 2, __buf.all_view()) .wait(); @@ -1562,10 +1591,11 @@ __pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last //------------------------------------------------------------------------ // reverse_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, - _ForwardIterator __result, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + +template +_ForwardIterator +__pattern_reverse_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __last, _ForwardIterator __result) { auto __n = __last - __first; if (__n <= 0) @@ -1578,7 +1608,7 @@ __pattern_reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); auto __buf2 = __keep2(__result, __result + __n); oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__reverse_copy::difference_type>{__n}, __n, __buf1.all_view(), __buf2.all_view()) .wait(); @@ -1599,10 +1629,10 @@ class __rotate_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, _Iterator __last, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator +__pattern_rotate(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, + _Iterator __last) { auto __n = __last - __first; if (__n <= 0) @@ -1614,21 +1644,27 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_f auto __buf = __keep(__first, __last); auto __temp_buf = oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _Tp>(__exec, __n); - auto __temp_rng = + auto __temp_rng_w = oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::write>(__temp_buf.get_buffer()); const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, - __buf.all_view(), __temp_rng); + __buf.all_view(), __temp_rng_w); - using _Function = __brick_move<_ExecutionPolicy>; + //An explicit wait isn't required here because we are working with a temporary sycl::buffer and sycl accessors and + //SYCL runtime makes a dependency graph to prevent the races between two __parallel_for patterns. + + using _Function = __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>; auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __brick, __n, - __temp_rng, __buf.all_view()) - .wait(); + auto __temp_rng_rw = + oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::read_write>(__temp_buf.get_buffer()); + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __brick, + __n, __temp_rng_rw, __buf.all_view()); + + //An explicit wait doesn't need here because we have implicit synchronization (and wait) on sycl::buffer destructor. return __first + (__last - __new_first); } @@ -1636,11 +1672,11 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_f //------------------------------------------------------------------------ // rotate_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __new_first, - _BidirectionalIterator __last, _ForwardIterator __result, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) + +template +_ForwardIterator +__pattern_rotate_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __new_first, _BidirectionalIterator __last, _ForwardIterator __result) { auto __n = __last - __first; if (__n <= 0) @@ -1656,7 +1692,7 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, __buf1.all_view(), __buf2.all_view()) @@ -1665,12 +1701,12 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, return __result + __n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_hetero_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsOpDifference) +template +_OutputIterator +__pattern_hetero_set_op(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp, _IsOpDifference) { typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _Size1; typedef typename ::std::iterator_traits<_ForwardIterator2>::difference_type _Size2; @@ -1709,7 +1745,7 @@ __pattern_hetero_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ auto __result_size = __par_backend_hetero::__parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( __buf1.all_view(), __buf2.all_view(), oneapi::dpl::__ranges::all_view( @@ -1730,19 +1766,19 @@ __pattern_hetero_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ return __result + __result_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_intersection(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) { // intersection is empty if (__first1 == __last1 || __first2 == __last2) return __result; - return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, - __result, __comp, unseq_backend::_IntersectionTag()); + return __pattern_hetero_set_op(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + __last2, __result, __comp, unseq_backend::_IntersectionTag()); } //Dummy names to avoid kernel problems @@ -1751,13 +1787,12 @@ class __set_difference_copy_case_1 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) { // {} \ {2}: the difference is empty if (__first1 == __last1) @@ -1767,13 +1802,15 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, if (__first2 == __last2) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_difference_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } - return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, - __result, __comp, unseq_backend::_DifferenceTag()); + return __pattern_hetero_set_op(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + __last2, __result, __comp, unseq_backend::_DifferenceTag()); } //Dummy names to avoid kernel problems @@ -1787,12 +1824,12 @@ class __set_union_copy_case_2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_union(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) { if (__first1 == __last1 && __first2 == __last2) return __result; @@ -1801,18 +1838,22 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw if (__first1 == __last1) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first2, __last2, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } //{2} is empty if (__first2 == __last2) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; @@ -1823,15 +1864,17 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw auto __buf = __diff.get(); //1. Calc difference {2} \ {1} - const auto __n_diff = oneapi::dpl::__internal::__pattern_hetero_set_op(__exec,__first2, __last2, __first1, __last1, - __buf,__comp, unseq_backend::_DifferenceTag() - ) - __buf; + const auto __n_diff = + oneapi::dpl::__internal::__pattern_hetero_set_op(__tag, __exec, __first2, __last2, __first1, __last1, __buf, + __comp, unseq_backend::_DifferenceTag()) - + __buf; + //2. Merge {1} and the difference return oneapi::dpl::__internal::__pattern_merge( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __buf, __buf + __n_diff, __result, __comp, - /*vector=*/::std::true_type(), /*parallel=*/::std::true_type()); + __first1, __last1, __buf, __buf + __n_diff, __result, __comp); } //Dummy names to avoid kernel problems @@ -1862,13 +1905,12 @@ class __set_symmetric_difference_phase_2 // 1. Calc difference {1} \ {2} // 2. Calc difference {2} \ {1} // 3. Merge the differences -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_symmetric_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, + _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp) { if (__first1 == __last1 && __first2 == __last2) return __result; @@ -1877,18 +1919,22 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 if (__first1 == __last1) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first2, __last2, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } //{2} is empty if (__first2 == __last2) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; @@ -1904,21 +1950,21 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 //1. Calc difference {1} \ {2} const auto __n_diff_1 = oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), __first1, __last1, __first2, __last2, __buf_1, __comp, unseq_backend::_DifferenceTag()) - __buf_1; //2. Calc difference {2} \ {1} const auto __n_diff_2 = oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), __first2, __last2, __first1, __last1, __buf_2, __comp, unseq_backend::_DifferenceTag()) - __buf_2; //3. Merge the differences - return oneapi::dpl::__internal::__pattern_merge(::std::forward<_ExecutionPolicy>(__exec), __buf_1, + return oneapi::dpl::__internal::__pattern_merge(__tag, ::std::forward<_ExecutionPolicy>(__exec), __buf_1, __buf_1 + __n_diff_1, __buf_2, __buf_2 + __n_diff_2, __result, - __comp, ::std::true_type(), ::std::true_type()); + __comp); } template @@ -1926,10 +1972,10 @@ class __shift_left_right { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__internal::__difference_t<_Range> __n) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_shift_left(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range __rng, + oneapi::dpl::__internal::__difference_t<_Range> __n) { //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. using _DiffType = oneapi::dpl::__internal::__difference_t<_Range>; @@ -1943,21 +1989,22 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__int //1. n >= size/2; 'size - _n' parallel copying if (__n >= __mid) { - using _Function = __brick_move<_ExecutionPolicy>; + using _Function = __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>; auto __brick = oneapi::dpl::unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; //TODO: to consider use just "read" access mode for a source range and just "write" - for a destination range. auto __src = oneapi::dpl::__ranges::drop_view_simple<_Range, _DiffType>(__rng, __n); auto __dst = oneapi::dpl::__ranges::take_view_simple<_Range, _DiffType>(__rng, __size_res); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __brick, __size_res, - __src, __dst) + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __brick, __size_res, __src, __dst) .wait(); } else //2. n < size/2; 'n' parallel copying { auto __brick = unseq_backend::__brick_shift_left<_ExecutionPolicy, _DiffType>{__size, __n}; oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__shift_left_right>( ::std::forward<_ExecutionPolicy>(__exec)), __brick, __n, __rng) @@ -1967,11 +2014,10 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__int return __size_res; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_shift_left(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - typename ::std::iterator_traits<_Iterator>::difference_type __n, /*vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) +template +_Iterator +__pattern_shift_left(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + typename ::std::iterator_traits<_Iterator>::difference_type __n) { //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. auto __size = __last - __first; @@ -1983,16 +2029,15 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __l auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - auto __res = - oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __n); + auto __res = oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __n); return __first + __res; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_shift_right(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - typename ::std::iterator_traits<_Iterator>::difference_type __n, /*vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) +template +_Iterator +__pattern_shift_right(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + typename ::std::iterator_traits<_Iterator>::difference_type __n) { //If (n > 0 && n < m), returns first + n. Otherwise, if n > 0, returns last. Otherwise, returns first. auto __size = __last - __first; @@ -2006,7 +2051,8 @@ __pattern_shift_right(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ //A shift right is the shift left with a reverse logic. auto __rng = oneapi::dpl::__ranges::reverse_view_simple{__buf.all_view()}; - auto __res = oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __rng, __n); + auto __res = + oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), __rng, __n); return __last - __res; } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index bee1d1f1f69..b9064b204d2 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -39,14 +39,14 @@ namespace __ranges // walk_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk_n(_ExecutionPolicy&& __exec, _Function __f, _Ranges&&... __rngs) +template +void +__pattern_walk_n(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Function __f, _Ranges&&... __rngs) { auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); if (__n > 0) { - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, ::std::forward<_Ranges>(__rngs)...) .wait(); @@ -67,13 +67,15 @@ class __swap2_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_swap(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Function __f) +template +bool +__pattern_swap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Function __f) { if (__rng1.size() <= __rng2.size()) { oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__swap1_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, __rng1, __rng2); @@ -81,6 +83,7 @@ __pattern_swap(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _F } oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__swap2_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, __rng2, __rng1); @@ -91,19 +94,19 @@ __pattern_swap(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _F // equal //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +bool +__pattern_equal(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) { if (__rng1.empty() || __rng2.empty() || __rng1.size() != __rng2.size()) return false; using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, equal_predicate<_Pred>>; - // TODO: in case of confilicting names + // TODO: in case of conflicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !oneapi::dpl::__par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, oneapi::dpl::__par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::zip_view(::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2))); } @@ -112,10 +115,9 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ // find_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_find_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) { //trivial pre-checks if (__rng.empty()) @@ -125,6 +127,7 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) using _TagType = oneapi::dpl::__par_backend_hetero::__parallel_find_forward_tag<_Range>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range>(__rng)); @@ -134,10 +137,10 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) // find_end //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng1.empty() || __rng2.empty() || __rng1.size() < __rng2.size()) @@ -145,8 +148,8 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2 if (__rng1.size() == __rng2.size()) { - const bool __res = __pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __rng1, - ::std::forward<_Range2>(__rng2), __pred); + const bool __res = __ranges::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __rng1, + ::std::forward<_Range2>(__rng2), __pred); return __res ? 0 : __rng1.size(); } @@ -154,6 +157,7 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2 using _TagType = __par_backend_hetero::__parallel_find_backward_tag<_Range1>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); @@ -163,10 +167,10 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2 // find_first_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_find_first_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng1.empty() || __rng2.empty()) @@ -177,6 +181,7 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& _ //TODO: To check whether it makes sense to iterate over the second sequence in case of __rng1.size() < __rng2.size() return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); @@ -186,15 +191,16 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& _ // any_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) +template +bool +__pattern_any_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) { if (__rng.empty()) return false; using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, oneapi::dpl::__par_backend_hetero::__parallel_or_tag{}, ::std::forward<_Range>(__rng)); @@ -209,10 +215,10 @@ class equal_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng2.empty()) @@ -222,8 +228,8 @@ __pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, if (__rng1.size() == __rng2.size()) { - const bool __res = __pattern_equal( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + const bool __res = __ranges::__pattern_equal( + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __rng1, ::std::forward<_Range2>(__rng2), __pred); return __res ? 0 : __rng1.size(); } @@ -232,8 +238,9 @@ __pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, using _TagType = oneapi::dpl::__par_backend_hetero::__parallel_find_forward_tag<_Range1>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy - (::std::forward<_ExecutionPolicy>(__exec)), + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy< + oneapi::dpl::__par_backend_hetero::__find_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); } @@ -241,18 +248,19 @@ __pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, // search_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_search_n(_ExecutionPolicy&& __exec, _Range&& __rng, _Size __count, const _Tp& __value, - _BinaryPredicate __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_search_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range&& __rng, _Size __count, + const _Tp& __value, _BinaryPredicate __pred) { //TODO: To consider definition a kind of special factory "multiple_view" (addition to standard "single_view"). //The factory "multiple_view" would generate a range of N identical values. auto __s_rng = oneapi::dpl::experimental::ranges::views::iota(0, __count) | oneapi::dpl::experimental::ranges::views::transform([__value](auto) { return __value; }); - return __pattern_search(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __s_rng, __pred); + return __ranges::__pattern_search(__tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), + __s_rng, __pred); } template @@ -273,11 +281,11 @@ return_value(_Size __res, _Size __size, ::std::false_type) // adjacent_find //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __predicate, - _OrFirstTag __is__or_semantic) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_adjacent_find(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, + _BinaryPredicate __predicate, _OrFirstTag __is__or_semantic) { if (__rng.size() < 2) return __rng.size(); @@ -290,21 +298,21 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredic auto __rng1 = __rng | oneapi::dpl::experimental::ranges::views::take(__rng.size() - 1); auto __rng2 = __rng | oneapi::dpl::experimental::ranges::views::drop(1); - // TODO: in case of confilicting names + // TODO: in case of conflicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() auto result = oneapi::dpl::__par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, - _TagType{}, oneapi::dpl::__ranges::zip_view(__rng1, __rng2)); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, _TagType{}, + oneapi::dpl::__ranges::zip_view(__rng1, __rng2)); // inverted conditional because of // reorder_predicate in glue_algorithm_impl.h return return_value(result, __rng.size(), __is__or_semantic); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_count(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicate) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicate) { if (__rng.size() == 0) return 0; @@ -320,7 +328,7 @@ __pattern_count(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicat return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) .get(); @@ -330,11 +338,11 @@ __pattern_count(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicat // copy_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_scan_copy(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _CreateMaskOp __create_mask_op, - _CopyByMaskOp __copy_by_mask_op) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { if (__rng1.size() == 0) return __rng1.size(); @@ -356,7 +364,7 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng auto __res = __par_backend_hetero::__parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::zip_view( __rng1, oneapi::dpl::__ranges::all_view( __mask_buf.get_buffer())), @@ -376,11 +384,11 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng return __res; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Predicate __pred, _Assign) +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Predicate __pred, _Assign) { using _SizeType = decltype(__rng1.size()); using _ReduceOp = ::std::plus<_SizeType>; @@ -388,18 +396,18 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, unseq_backend::__create_mask<_Predicate, _SizeType> __create_mask_op{__pred}; unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /*inclusive*/ ::std::true_type, 1> __copy_by_mask_op; - return __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), __create_mask_op, __copy_by_mask_op); + return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + __create_mask_op, __copy_by_mask_op); } //------------------------------------------------------------------------ // remove_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_remove_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) { if (__rng.size() == 0) return __rng.size(); @@ -409,13 +417,14 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __rng.size()); auto __copy_rng = oneapi::dpl::__ranges::views::all(__buf.get_buffer()); - auto __copy_last_id = __pattern_copy_if(__exec, __rng, __copy_rng, __not_pred<_Predicate>{__pred}, - oneapi::dpl::__internal::__pstl_assign()); + auto __copy_last_id = __ranges::__pattern_copy_if(__tag, __exec, __rng, __copy_rng, __not_pred<_Predicate>{__pred}, + oneapi::dpl::__internal::__pstl_assign()); auto __copy_rng_truncated = __copy_rng | oneapi::dpl::experimental::ranges::views::take(__copy_last_id); - oneapi::dpl::__internal::__ranges::__pattern_walk_n(::std::forward<_ExecutionPolicy>(__exec), - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - __copy_rng_truncated, ::std::forward<_Range>(__rng)); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, __copy_rng_truncated, + ::std::forward<_Range>(__rng)); return __copy_last_id; } @@ -424,11 +433,11 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred // unique_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_unique_copy(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred, _Assign) +template +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, + _BinaryPredicate __pred, _Assign) { using _It1DifferenceType = oneapi::dpl::__internal::__difference_t<_Range1>; unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, _Assign, /*inclusive*/ ::std::true_type, 1> @@ -436,18 +445,18 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __re __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ __not_pred<_BinaryPredicate>{__pred}}; - return __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng), - ::std::forward<_Range2>(__result), __create_mask_op, __copy_by_mask_op); + return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__rng), ::std::forward<_Range2>(__result), + __create_mask_op, __copy_by_mask_op); } //------------------------------------------------------------------------ // unique //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_unique(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) { if (__rng.size() == 0) return __rng.size(); @@ -456,10 +465,12 @@ __pattern_unique(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __p oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __rng.size()); auto res_rng = oneapi::dpl::__ranges::views::all(__buf.get_buffer()); - auto res = __pattern_unique_copy(__exec, __rng, res_rng, __pred, oneapi::dpl::__internal::__pstl_assign()); + auto res = __ranges::__pattern_unique_copy(__tag, __exec, __rng, res_rng, __pred, + oneapi::dpl::__internal::__pstl_assign()); - __pattern_walk_n(::std::forward<_ExecutionPolicy>(__exec), __brick_copy<_ExecutionPolicy>{}, res_rng, - ::std::forward<_Range>(__rng)); + __ranges::__pattern_walk_n(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, res_rng, + ::std::forward<_Range>(__rng)); return res; } @@ -477,10 +488,11 @@ class __copy2_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__pattern_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) +template +oneapi::dpl::__internal::__difference_t<_Range3> +__pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Range3&& __rng3, _Compare __comp) { auto __n1 = __rng1.size(); auto __n2 = __rng2.size(); @@ -492,22 +504,24 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ if (__n1 == 0) { oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy1_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3)); + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, + ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3)); } else if (__n2 == 0) { oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy2_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::forward<_Range1>(__rng1), - ::std::forward<_Range3>(__rng3)); + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, + ::std::forward<_Range1>(__rng1), ::std::forward<_Range3>(__rng3)); } else { - __par_backend_hetero::__parallel_merge(::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::__parallel_merge(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp) .wait(); @@ -520,12 +534,12 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ // sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) +template +void +__pattern_sort(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) { if (__rng.size() >= 2) - __par_backend_hetero::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::__parallel_stable_sort(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __comp, __proj) .wait(); } @@ -534,10 +548,9 @@ __pattern_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj // min_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_min_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { //If size == 1, result is the zero-indexed element. If size == 0, result is 0. if (__rng.size() < 2) @@ -562,7 +575,7 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) .get(); @@ -575,11 +588,9 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp // minmax_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy< - _ExecutionPolicy, - ::std::pair, oneapi::dpl::__internal::__difference_t<_Range>>> -__pattern_minmax_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) +template +::std::pair, oneapi::dpl::__internal::__difference_t<_Range>> +__pattern_minmax_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { //If size == 1, result is the zero-indexed element. If size == 0, result is 0. if (__rng.size() < 2) @@ -618,7 +629,7 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __c _ReduceValueType __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) .get(); @@ -661,12 +672,12 @@ class __assign_key2_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, - _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op) +template +oneapi::dpl::__internal::__difference_t<_Range3> +__pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __keys, + _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, + _BinaryPredicate __binary_pred, _BinaryOperator __binary_op) { // The algorithm reduces values in __values where the // associated keys for the values are equal to the adjacent key. @@ -684,15 +695,16 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 if (__n == 1) { - __brick_copy<_ExecutionPolicy> __copy_range{}; + __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy> __copy_range{}; oneapi::dpl::__internal::__ranges::__pattern_walk_n( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_keys_wrapper>(__exec), - __copy_range, ::std::forward<_Range1>(__keys), ::std::forward<_Range3>(__out_keys)); + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_keys_wrapper>(__exec), __copy_range, + ::std::forward<_Range1>(__keys), ::std::forward<_Range3>(__out_keys)); oneapi::dpl::__internal::__ranges::__pattern_walk_n( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_values_wrapper> - (::std::forward<_ExecutionPolicy>(__exec)), + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_values_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), __copy_range, ::std::forward<_Range2>(__values), ::std::forward<_Range4>(__out_values)); return 1; @@ -730,8 +742,8 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 // evenly divisible by wg size (ensures segments are not long), or has a key not equal to the // adjacent element (marks end of real segments) // TODO: replace wgroup size with segment size based on platform specifics. - auto __intermediate_result_end = __pattern_copy_if( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__assign_key1_wrapper>(__exec), __view1, __view2, + auto __intermediate_result_end = __ranges::__pattern_copy_if( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__assign_key1_wrapper>(__exec), __view1, __view2, [__n, __binary_pred, __wgroup_size](const auto& __a) { // The size of key range for the (i-1) view is one less, so for the 0th index we do not check the keys // for (i-1), but we still need to get its key value as it is the start of a segment @@ -745,7 +757,7 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 //reduce by segment oneapi::dpl::__par_backend_hetero::__parallel_for( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce1_wrapper>(__exec), + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce1_wrapper>(__exec), unseq_backend::__brick_reduce_idx<_BinaryOperator, decltype(__n)>(__binary_op, __n), __intermediate_result_end, oneapi::dpl::__ranges::take_view_simple(experimental::ranges::views::all_read(__idx), __intermediate_result_end), @@ -773,8 +785,8 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 // element is copied if it is the 0th element (marks beginning of first segment), or has a key not equal to // the adjacent element (end of a segment). Artificial segments based on wg size are not created. - auto __result_end = __pattern_copy_if( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__assign_key2_wrapper>(__exec), __view3, __view4, + auto __result_end = __ranges::__pattern_copy_if( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__assign_key2_wrapper>(__exec), __view3, __view4, [__binary_pred](const auto& __a) { // The size of key range for the (i-1) view is one less, so for the 0th index we do not check the keys // for (i-1), but we still need to get its key value as it is the start of a segment @@ -786,6 +798,7 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 //reduce by segment oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce2_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), unseq_backend::__brick_reduce_idx<_BinaryOperator, decltype(__intermediate_result_end)>( diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index de875bee324..e1e06e17e96 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -18,6 +18,7 @@ #include "../../onedpl_config.h" #include "../../execution_defs.h" +#include "../../iterator_defs.h" #include "sycl_defs.h" @@ -59,24 +60,6 @@ class device_policy return q; } - // For internal use only - static constexpr ::std::true_type - __allow_unsequenced() - { - return ::std::true_type{}; - } - // __allow_vector is needed for __is_vectorization_preferred - static constexpr ::std::true_type - __allow_vector() - { - return ::std::true_type{}; - } - static constexpr ::std::true_type - __allow_parallel() - { - return ::std::true_type{}; - } - private: sycl::queue q; }; @@ -98,7 +81,7 @@ class fpga_policy : public device_policy # else __dpl_sycl::__fpga_selector() # endif // _ONEDPL_FPGA_EMU - )) + )) { } @@ -106,14 +89,6 @@ class fpga_policy : public device_policy fpga_policy(const fpga_policy& other) : base(other.queue()){}; explicit fpga_policy(sycl::queue q) : base(q) {} explicit fpga_policy(sycl::device d) : base(d) {} - - // For internal use only - - const base& - __device_policy() const - { - return static_cast(*this); - }; }; #endif // _ONEDPL_FPGA_DEVICE @@ -311,6 +286,66 @@ using __enable_if_device_execution_policy_double_no_default = __is_convertible_to_event<_Events...>, _T>; +template +struct __hetero_tag +{ + using __backend_tag = _BackendTag; +}; + +struct __device_backend_tag +{ +}; + +//---------------------------------------------------------- +// __select_backend (for the hetero policies) +//---------------------------------------------------------- + +template +__hetero_tag<__device_backend_tag> +__select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&...) +{ + static_assert(__is_random_access_iterator_v<_IteratorTypes...>); + return {}; +} + +#if _ONEDPL_FPGA_DEVICE +struct __fpga_backend_tag : __device_backend_tag +{ +}; + +template +__hetero_tag<__fpga_backend_tag> +__select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorTypes&&...) +{ + static_assert(__is_random_access_iterator_v<_IteratorTypes...>); + return {}; +} +#endif + +//---------------------------------------------------------- +// __is_hetero_backend_tag, __is_hetero_backend_tag_v +//---------------------------------------------------------- + +template +struct __is_hetero_backend_tag : ::std::false_type +{ +}; + +template <> +struct __is_hetero_backend_tag<__device_backend_tag> : ::std::true_type +{ +}; + +#if _ONEDPL_FPGA_DEVICE +template <> +struct __is_hetero_backend_tag<__fpga_backend_tag> : ::std::true_type +{ +}; +#endif + +template +inline constexpr bool __is_hetero_backend_tag_v = __is_hetero_backend_tag<_BackendTag>::value; + } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index e0152a4006a..7cda428e542 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -40,6 +40,8 @@ # include "parallel_backend_sycl_radix_sort.h" #endif +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl @@ -244,10 +246,10 @@ struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>> //General version of parallel_for, one additional parameter - __count of iterations of loop __cgh.parallel_for, //for some algorithms happens that size of processing range is n, but amount of iterations is n/2. -template = 0, typename... _Ranges> +template auto -__parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, + _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _ForKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<_CustomName>; @@ -659,9 +661,10 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W template auto -__parallel_transform_scan_single_group(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, - ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, - _BinaryOperation __binary_op, _Inclusive) +__parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, ::std::size_t __n, + _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, + _Inclusive) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -735,12 +738,11 @@ __parallel_transform_scan_single_group(_ExecutionPolicy&& __exec, _InRng&& __in_ } template = 0> + typename _LocalScan, typename _GroupScan, typename _GlobalScan> auto -__parallel_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, - _BinaryOperation __binary_op, _InitType __init, _LocalScan __local_scan, - _GroupScan __group_scan, _GlobalScan __global_scan) +__parallel_transform_scan_base(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + _Range1&& __in_rng, _Range2&& __out_rng, _BinaryOperation __binary_op, _InitType __init, + _LocalScan __local_scan, _GroupScan __group_scan, _GlobalScan __global_scan) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -753,11 +755,11 @@ __parallel_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _R } template = 0> + typename _BinaryOperation, typename _Inclusive> auto -__parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) +__parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, + _InitType __init, _BinaryOperation __binary_op, _Inclusive) { using _Type = typename _InitType::__value_type; @@ -779,7 +781,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2 if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size) { return __parallel_transform_scan_single_group( - std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), + __backend_tag, std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); } } @@ -796,7 +798,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2 return __future( __parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __binary_op, __init, // local scan unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, @@ -852,11 +854,11 @@ struct __invoke_single_group_copy_if }; template = 0> + typename _CopyByMaskOp> auto -__parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +__parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, + _CopyByMaskOp __copy_by_mask_op) { using _ReduceOp = ::std::plus<_Size>; using _Assigner = unseq_backend::__scan_assigner; @@ -874,7 +876,7 @@ __parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __o oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n); return __parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( ::std::forward<_InRng>(__in_rng), oneapi::dpl::__ranges::all_view( @@ -892,10 +894,10 @@ __parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __o __copy_by_mask_op); } -template = 0> +template auto -__parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) +__parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) { using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>; @@ -930,8 +932,9 @@ __parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out using CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ ::std::true_type, 1>; - return __parallel_scan_copy(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __n, CreateOp{__pred}, CopyOp{}); + return __parallel_scan_copy(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, + CreateOp{__pred}, CopyOp{}); } } @@ -1071,12 +1074,11 @@ struct __early_exit_find_or // Base pattern for __parallel_or and __parallel_find. The execution depends on tag type _BrickTag. template -oneapi::dpl::__internal::__enable_if_device_execution_policy< - _ExecutionPolicy, - ::std::conditional_t<::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, - oneapi::dpl::__internal::__difference_t< - typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>>> -__parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, _Ranges&&... __rngs) +::std::conditional_t< + ::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, + oneapi::dpl::__internal::__difference_t::type>> +__parallel_find_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Brick __f, + _BrickTag __brick_tag, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _AtomicType = typename _BrickTag::_AtomicType; @@ -1182,9 +1184,9 @@ class __or_policy_wrapper }; template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f) +bool +__parallel_or(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf = __keep(__first, __last); @@ -1192,6 +1194,7 @@ __parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, auto __s_buf = __s_keep(__s_first, __s_last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag, __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, __parallel_or_tag{}, __buf.all_view(), __s_buf.all_view()); } @@ -1200,13 +1203,15 @@ __parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) +bool +__parallel_or(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Brick __f) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag, __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, __parallel_or_tag{}, __buf.all_view()); } @@ -1221,9 +1226,9 @@ class __find_policy_wrapper }; template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, _Iterator1> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f, _IsFirst) +_Iterator1 +__parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf = __keep(__first, __last); @@ -1233,6 +1238,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); @@ -1242,8 +1248,9 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, _Iterator> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) +_Iterator +__parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); @@ -1251,6 +1258,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view()); @@ -1482,10 +1490,10 @@ struct __parallel_merge_submitter<_IdType, __internal::__optional_kernel_name<_N template class __merge_kernel_name; -template = 0> +template auto -__parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) +__parallel_merge(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, + _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -1493,19 +1501,20 @@ __parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, if (__n <= std::numeric_limits<::std::uint32_t>::max()) { using _wi_index_type = ::std::uint32_t; - using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__merge_kernel_name<_CustomName, _wi_index_type>>; - return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()(::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); - + using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< + __merge_kernel_name<_CustomName, _wi_index_type>>; + return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()( + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + ::std::forward<_Range3>(__rng3), __comp); } else { using _wi_index_type = ::std::uint64_t; - using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__merge_kernel_name<_CustomName, _wi_index_type>>; - return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()(::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); + using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< + __merge_kernel_name<_CustomName, _wi_index_type>>; + return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()( + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + ::std::forward<_Range3>(__rng3), __comp); } } @@ -1544,9 +1553,9 @@ struct __parallel_sort_submitter<_IdType, __internal::__optional_kernel_name<_Le __internal::__optional_kernel_name<_GlobalSortName...>, __internal::__optional_kernel_name<_CopyBackName...>> { - template + template auto - operator()(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) const + operator()(_BackendTag, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) const { using _Tp = oneapi::dpl::__internal::__value_t<_Range>; using _Size = oneapi::dpl::__internal::__difference_t<_Range>; @@ -1636,10 +1645,10 @@ struct __parallel_sort_submitter<_IdType, __internal::__optional_kernel_name<_Le } }; -template = 0> +template auto -__parallel_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) +__parallel_sort_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Compare __comp) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -1654,7 +1663,8 @@ __parallel_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) using _CopyBackKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__sort_copy_back_kernel<_CustomName, _wi_index_type>>; return __parallel_sort_submitter<_wi_index_type, _LeafSortKernel, _GlobalSortKernel, _CopyBackKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __comp); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range>(__rng), __comp); } else { @@ -1666,7 +1676,8 @@ __parallel_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) using _CopyBackKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__sort_copy_back_kernel<_CustomName, _wi_index_type>>; return __parallel_sort_submitter<_wi_index_type, _LeafSortKernel, _GlobalSortKernel, _CopyBackKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __comp); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range>(__rng), __comp); } } @@ -1678,9 +1689,9 @@ template struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_GlobalSortName...>, __internal::__optional_kernel_name<_CopyBackName...>> { - template + template auto - operator()(_ExecutionPolicy&& __exec, _Range&& __rng, _Merge __merge, _Compare __comp) const + operator()(_BackendTag, _ExecutionPolicy&& __exec, _Range&& __rng, _Merge __merge, _Compare __comp) const { using _Tp = oneapi::dpl::__internal::__value_t<_Range>; using _Size = oneapi::dpl::__internal::__difference_t<_Range>; @@ -1743,10 +1754,10 @@ struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_Glo } }; -template = 0> +template auto -__parallel_partial_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Merge __merge, _Compare __comp) +__parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Merge __merge, _Compare __comp) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _GlobalSortKernel = @@ -1755,7 +1766,8 @@ __parallel_partial_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Merge _ oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__sort_copy_back_kernel<_CustomName>>; return __parallel_partial_sort_submitter<_GlobalSortKernel, _CopyBackKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __merge, __comp); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range>(__rng), __merge, __comp); } //------------------------------------------------------------------------ @@ -1776,31 +1788,32 @@ struct __is_radix_sort_usable_for_type }; #if _USE_RADIX_SORT -template > && - __is_radix_sort_usable_for_type, _Compare>::value, - int> = 0> +template < + typename _ExecutionPolicy, typename _Range, typename _Compare, typename _Proj, + ::std::enable_if_t< + __is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare, _Proj __proj) +__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range&& __rng, _Compare, _Proj __proj) { return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); } #endif -template > && - !__is_radix_sort_usable_for_type, _Compare>::value, - int> = 0> +template < + typename _ExecutionPolicy, typename _Range, typename _Compare, typename _Proj, + ::std::enable_if_t< + !__is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) +__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range&& __rng, _Compare __comp, _Proj __proj) { auto __cmp_f = [__comp, __proj](const auto& __a, const auto& __b) mutable { return __comp(__proj(__a), __proj(__b)); }; - return __parallel_sort_impl(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); + return __parallel_sort_impl(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), + __cmp_f); } //------------------------------------------------------------------------ @@ -1810,21 +1823,19 @@ __parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __com // TODO: check if it makes sense to move these wrappers out of backend to a common place // TODO: consider changing __partial_merge_kernel to make it compatible with // __full_merge_kernel in order to use __parallel_sort_impl routine -template = 0> +template auto -__parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, - _Compare __comp) +__parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator __first, _Iterator __mid, _Iterator __last, _Compare __comp) { const auto __mid_idx = __mid - __first; auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __parallel_partial_sort_impl(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), + return __parallel_partial_sort_impl(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __partial_merge_kernel{__mid_idx}, __comp); } - } // namespace __par_backend_hetero } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index b8410261c60..7baee78b1b1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -34,6 +34,8 @@ #include "../../iterator_impl.h" #include "sycl_iterator.h" +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl @@ -77,10 +79,10 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... } }; -template = 0> +template auto -__parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, + _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using __parallel_for_name = __internal::__kernel_name_provider<_CustomName>; @@ -89,201 +91,23 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& __count, std::forward<_Ranges>(__rngs)...); } -//------------------------------------------------------------------------ -// parallel_transform_reduce -//------------------------------------------------------------------------ - -template = 0, - typename... _Ranges> -auto -__parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_Tp, _Commutative>( - __exec.__device_policy(), __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); -} - -//------------------------------------------------------------------------ -// parallel_transform_scan -//------------------------------------------------------------------------ - -template = 0> -auto -__parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - __exec.__device_policy(), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, - __unary_op, __init, __binary_op, _Inclusive{}); -} - -template = 0> -auto -__parallel_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, - _BinaryOperation __binary_op, _InitType __init, _LocalScan __local_scan, - _GroupScan __group_scan, _GlobalScan __global_scan) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_base( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), __binary_op, __init, - __local_scan, __group_scan, __global_scan); -} - -template = 0> -auto -__parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_copy_if( - __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, __pred); -} - -template = 0> -auto -__parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_scan_copy( - __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, - __create_mask_op, __copy_by_mask_op); -} - -//------------------------------------------------------------------------ -// __parallel_find_or -//----------------------------------------------------------------------- -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy< - _ExecutionPolicy, - ::std::conditional_t<::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, - oneapi::dpl::__internal::__difference_t< - typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>>> -__parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, _Ranges&&... __rngs) -{ - return oneapi::dpl::__par_backend_hetero::__parallel_find_or(__exec.__device_policy(), __f, __brick_tag, - ::std::forward<_Ranges>(__rngs)...); -} - -//------------------------------------------------------------------------ -// parallel_or -//----------------------------------------------------------------------- -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(__exec.__device_policy(), __first, __last, __s_first, - __s_last, __f); -} - -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(__exec.__device_policy(), __first, __last, __f); -} - -//------------------------------------------------------------------------ -// parallel_find -//----------------------------------------------------------------------- - -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, _Iterator1> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f, _IsFirst __is_first) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find(__exec.__device_policy(), __first, __last, __s_first, - __s_last, __f, __is_first); -} - -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, _Iterator> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst __is_first) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find(__exec.__device_policy(), __first, __last, __f, - __is_first); -} - -//------------------------------------------------------------------------ -// parallel_merge -//----------------------------------------------------------------------- - -template -auto -__parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) - -> oneapi::dpl::__internal::__enable_if_fpga_execution_policy< - _ExecutionPolicy, decltype(oneapi::dpl::__par_backend_hetero::__parallel_merge( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp))> -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_merge( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); -} - -//------------------------------------------------------------------------ -// parallel_stable_sort -//----------------------------------------------------------------------- - -template = 0> -auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_stable_sort(__exec.__device_policy(), - ::std::forward<_Range>(__rng), __comp, __proj); -} - -//------------------------------------------------------------------------ -// parallel_partial_sort -//----------------------------------------------------------------------- - -// TODO: check if it makes sense to move these wrappers out of backend to a common place -template = 0> -auto -__parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, - _Compare __comp) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_partial_sort(__exec.__device_policy(), __first, __mid, __last, - __comp); -} - //------------------------------------------------------------------------ // parallel_histogram //----------------------------------------------------------------------- // TODO: check if it makes sense to move these wrappers out of backend to a common place -template = 0> +template auto -__parallel_histogram(_ExecutionPolicy&& __exec, const _Event& __init_event, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) +__parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, const _Event& __init_event, + _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { static_assert(sizeof(oneapi::dpl::__internal::__value_t<_Range2>) <= sizeof(::std::uint32_t), "histogram is not supported on FPGA devices with output types greater than 32 bits"); // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_histogram(__exec.__device_policy(), __init_event, - ::std::forward<_Range1>(__input), - ::std::forward<_Range2>(__bins), __binhash_manager); + return oneapi::dpl::__par_backend_hetero::__parallel_histogram( + oneapi::dpl::__internal::__device_backend_tag{}, __exec, __init_event, ::std::forward<_Range1>(__input), + ::std::forward<_Range2>(__bins), __binhash_manager); } } // namespace __par_backend_hetero diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index fdcf06ad984..ee864a53594 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -29,6 +29,8 @@ #include "../../histogram_binhash_utils.h" #include "../../utils.h" +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl @@ -285,9 +287,9 @@ struct __histogram_general_registers_local_reduction_submitter<__iters_per_work_ template <::std::uint16_t __iters_per_work_item, ::std::uint8_t __bins_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__histogram_general_registers_local_reduction(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) +__histogram_general_registers_local_reduction(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __work_group_size, + _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; @@ -380,9 +382,9 @@ struct __histogram_general_local_atomics_submitter<__iters_per_work_item, template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__histogram_general_local_atomics(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) +__histogram_general_local_atomics(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __work_group_size, _Range1&& __input, + _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; @@ -405,11 +407,11 @@ struct __histogram_general_private_global_atomics_submitter; template struct __histogram_general_private_global_atomics_submitter<__internal::__optional_kernel_name<_KernelName...>> { - template + template auto - operator()(_ExecutionPolicy&& __exec, const sycl::event& __init_event, ::std::uint16_t __min_iters_per_work_item, - ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) + operator()(_BackendTag, _ExecutionPolicy&& __exec, const sycl::event& __init_event, + ::std::uint16_t __min_iters_per_work_item, ::std::uint16_t __work_group_size, _Range1&& __input, + _Range2&& __bins, const _BinHashMgr& __binhash_manager) { const ::std::size_t __n = __input.size(); const ::std::size_t __num_bins = __bins.size(); @@ -477,9 +479,10 @@ struct __histogram_general_private_global_atomics_submitter<__internal::__option }; template auto -__histogram_general_private_global_atomics(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __min_iters_per_work_item, ::std::uint16_t __work_group_size, - _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) +__histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __min_iters_per_work_item, + ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, + const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; @@ -487,14 +490,16 @@ __histogram_general_private_global_atomics(_ExecutionPolicy&& __exec, const sycl __histo_kernel_private_glocal_atomics<_kernel_base_name>>; return __histogram_general_private_global_atomics_submitter<_global_atomics_name>()( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __min_iters_per_work_item, __work_group_size, - ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __min_iters_per_work_item, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), + __binhash_manager); } template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, +__parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _private_histogram_type = ::std::uint16_t; @@ -513,7 +518,7 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& { return __future( __histogram_general_registers_local_reduction<__iters_per_work_item, __max_work_item_private_bins>( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } // if bins fit into SLM, use local atomics @@ -522,8 +527,8 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& __local_mem_size) { return __future(__histogram_general_local_atomics<__iters_per_work_item>( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input), - ::std::forward<_Range2>(__bins), __binhash_manager)); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, + ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } else // otherwise, use global atomics (private copies per workgroup) { @@ -533,26 +538,27 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& // private copies of the histogram bins in global memory. No unrolling is taken advantage of here because it // is a runtime argument. return __future(__histogram_general_private_global_atomics( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item, __work_group_size, - ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item, + __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } } template auto -__parallel_histogram(_ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, +__parallel_histogram(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { if (__input.size() < 1048576) // 2^20 { return __parallel_histogram_select_kernel( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } else { return __parallel_histogram_select_kernel( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index 04fa8d3f124..b46fb50c831 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -25,6 +25,8 @@ #include "parallel_backend_sycl_utils.h" #include "execution_sycl_defs.h" +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + #define _ONEDPL_RADIX_WORKLOAD_TUNING 1 //To achieve better performance, number of segments and work-group size are variated depending on a number of elements: //1. 32K...512K - number of segments is increased up to 8 times @@ -757,7 +759,8 @@ struct __parallel_radix_sort_iteration //----------------------------------------------------------------------- template auto -__parallel_radix_sort(_ExecutionPolicy&& __exec, _Range&& __in_rng, _Proj __proj) +__parallel_radix_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __in_rng, + _Proj __proj) { const ::std::size_t __n = __in_rng.size(); assert(__n > 1); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h index 6915f2b09e5..fbf80582d43 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h @@ -16,6 +16,8 @@ #ifndef _ONEDPL_parallel_backend_sycl_radix_sort_one_wg_H #define _ONEDPL_parallel_backend_sycl_radix_sort_one_wg_H +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + //The file is an internal file and the code of that file is included by a major file into the following namespaces: //namespace oneapi //{ diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 52b036ab4b3..a2c1bda6a35 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -26,6 +26,8 @@ #include "unseq_backend_sycl.h" #include "utils_ranges_sycl.h" +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl @@ -106,11 +108,10 @@ struct __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __ite __internal::__optional_kernel_name<_Name...>> { template = 0, typename... _Ranges> auto - operator()(_ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, const _Size __n, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) const { auto __transform_pattern = unseq_backend::transform_reduce<_ExecutionPolicy, __iters_per_work_item, _ReduceOp, _TransformOp, @@ -126,7 +127,9 @@ struct __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __ite __cgh.parallel_for<_Name...>( sycl::nd_range<1>(sycl::range<1>(__work_group_size), sycl::range<1>(__work_group_size)), [=](sycl::nd_item<1> __item_id) { - auto __res_ptr = __res_acc.__get_pointer(); + auto __res_ptr = + __usm_host_or_buffer_storage<_ExecutionPolicy, _Tp>::__get_usm_host_or_buffer_accessor_ptr( + __res_acc); __work_group_reduce_kernel<_Tp>(__item_id, __n, __transform_pattern, __reduce_pattern, __init, __temp_local, __res_ptr, __rngs...); }); @@ -138,9 +141,10 @@ struct __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __ite template = 0, typename... _Ranges> + typename... _Ranges> auto -__parallel_transform_reduce_small_impl(_ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, +__parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -148,9 +152,9 @@ __parallel_transform_reduce_small_impl(_ExecutionPolicy&& __exec, const _Size __ __reduce_small_kernel<::std::integral_constant<::std::uint8_t, __iters_per_work_item>, _CustomName>>; return __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __iters_per_work_item, _Commutative, - _ReduceKernel>()(::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + _ReduceKernel>()( + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } // Submits the first kernel of the parallel_transform_reduce for mid-sized arrays. @@ -166,11 +170,11 @@ struct __parallel_transform_reduce_device_kernel_submitter<_Tp, __work_group_siz __internal::__optional_kernel_name<_KernelName...>> { template = 0, typename... _Ranges> auto - operator()(_ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, sycl::buffer<_Tp>& __temp, _Ranges&&... __rngs) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Size __n, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, sycl::buffer<_Tp>& __temp, + _Ranges&&... __rngs) const { auto __transform_pattern = unseq_backend::transform_reduce<_ExecutionPolicy, __iters_per_work_item, _ReduceOp, _TransformOp, @@ -207,11 +211,11 @@ template > { - template = 0> + template auto - operator()(_ExecutionPolicy&& __exec, sycl::event& __reduce_event, _Size __n, _ReduceOp __reduce_op, - _TransformOp __transform_op, _InitType __init, sycl::buffer<_Tp>& __temp) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, sycl::event& __reduce_event, + _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, + sycl::buffer<_Tp>& __temp) const { using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; auto __transform_pattern = @@ -243,7 +247,9 @@ struct __parallel_transform_reduce_work_group_kernel_submitter< __cgh.parallel_for<_KernelName...>( sycl::nd_range<1>(sycl::range<1>(__work_group_size2), sycl::range<1>(__work_group_size2)), [=](sycl::nd_item<1> __item_id) { - auto __res_ptr = __res_acc.__get_pointer(); + auto __res_ptr = + __usm_host_or_buffer_storage<_ExecutionPolicy, _Tp>::__get_usm_host_or_buffer_accessor_ptr( + __res_acc); __work_group_reduce_kernel<_Tp>(__item_id, __n, __transform_pattern, __reduce_pattern, __init, __temp_local, __res_ptr, __temp_acc); }); @@ -255,10 +261,10 @@ struct __parallel_transform_reduce_work_group_kernel_submitter< template = 0, typename... _Ranges> + typename _Size, typename _ReduceOp, typename _TransformOp, typename _InitType, typename... _Ranges> auto -__parallel_transform_reduce_mid_impl(_ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, +__parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -279,12 +285,14 @@ __parallel_transform_reduce_mid_impl(_ExecutionPolicy&& __exec, _Size __n, _Redu sycl::event __reduce_event = __parallel_transform_reduce_device_kernel_submitter<_Tp, __work_group_size, __iters_per_work_item_device_kernel, _Commutative, _ReduceDeviceKernel>()( - __exec, __n, __reduce_op, __transform_op, __init, __temp, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, __exec, __n, __reduce_op, __transform_op, __init, __temp, + ::std::forward<_Ranges>(__rngs)...); __n = __n_groups; // Number of preliminary results from the device kernel. return __parallel_transform_reduce_work_group_kernel_submitter< _Tp, __work_group_size, __iters_per_work_item_work_group_kernel, _Commutative, _ReduceWorkGroupKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, __reduce_op, __transform_op, __init, __temp); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, __reduce_op, __transform_op, + __init, __temp); } // General implementation using a tree reduction @@ -292,11 +300,11 @@ template = 0, typename... _Ranges> static auto - submit(_ExecutionPolicy&& __exec, _Size __n, ::std::uint16_t __work_group_size, _ReduceOp __reduce_op, - _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) + submit(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Size __n, + ::std::uint16_t __work_group_size, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, + _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; @@ -355,7 +363,9 @@ struct __parallel_transform_reduce_impl sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), sycl::range<1>(__work_group_size)), [=](sycl::nd_item<1> __item_id) { - auto __res_ptr = __res_acc.__get_pointer(); + auto __res_ptr = + __usm_host_or_buffer_storage<_ExecutionPolicy, _Tp>::__get_usm_host_or_buffer_accessor_ptr( + __res_acc); auto __local_idx = __item_id.get_local_id(0); auto __group_idx = __item_id.get_group(0); // 1. Initialization (transform part). Fill local memory @@ -411,11 +421,10 @@ struct __parallel_transform_reduce_impl // Big arrays are processed with a recursive tree reduction. __work_group_size * __iters_per_work_item elements are // reduced in each step. template = 0, - typename... _Ranges> + typename _InitType, typename... _Ranges> auto -__parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) +__parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); assert(__n > 0); @@ -431,37 +440,37 @@ __parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _T if (__n <= 256) { return __parallel_transform_reduce_small_impl<_Tp, 256, 1, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 512) { return __parallel_transform_reduce_small_impl<_Tp, 256, 2, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 1024) { return __parallel_transform_reduce_small_impl<_Tp, 256, 4, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 2048) { return __parallel_transform_reduce_small_impl<_Tp, 256, 8, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4096) { return __parallel_transform_reduce_small_impl<_Tp, 256, 16, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8192) { return __parallel_transform_reduce_small_impl<_Tp, 256, 32, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } @@ -471,44 +480,44 @@ __parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _T else if (__n <= 2097152) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 1, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4194304) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 2, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8388608) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 4, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 16777216) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 8, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 33554432) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 16, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 67108864) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 32, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } } // Otherwise use a recursive tree reduction. return __parallel_transform_reduce_impl<_Tp, 32, _Commutative>::submit( - ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, + __init, ::std::forward<_Ranges>(__rngs)...); } } // namespace __par_backend_hetero diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index 62025411b24..bbfd53662ad 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -386,8 +386,6 @@ class __buffer_impl __container_t __container; public: - static_assert(::std::is_same_v<_ExecutionPolicy, ::std::decay_t<_ExecutionPolicy>>); - __buffer_impl(_ExecutionPolicy /*__exec*/, ::std::size_t __n_elements) : __container{sycl::range<1>(__n_elements)} { } @@ -510,7 +508,7 @@ struct __usm_host_or_buffer_storage __use_USM_host_allocations(sycl::queue __queue) { // A buffer is used by default. Supporting compilers use the unified future on top of USM host memory or a buffer. -#if _ONEDPL_SYCL_USM_HOST_PRESENT +#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT auto __device = __queue.get_device(); if (!__device.is_gpu()) return false; @@ -540,11 +538,26 @@ struct __usm_host_or_buffer_storage } } + template + static auto + __get_usm_host_or_buffer_accessor_ptr(const _Acc& __acc) + { +#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT + return __acc.__get_pointer(); +#else + return &__acc[0]; +#endif + } + auto __get_acc(sycl::handler& __cgh) { +#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT return __usm ? __usm_host_or_buffer_accessor<_T>(__cgh, __usm_buf.get()) : __usm_host_or_buffer_accessor<_T>(__cgh, __sycl_buf.get()); +#else + return sycl::accessor(*__sycl_buf.get(), __cgh, sycl::read_write, __dpl_sycl::__no_init{}); +#endif } _T diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index d864337087c..a4c470502fe 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -82,12 +82,12 @@ # define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE_IF_SUPPORTED(SIZE) #endif -// The unified future supporting USM host memory and buffers is only supported after DPCPP 2023.1 +// The unified future supporting USM memory and buffers is only supported after DPCPP 2023.1 // but not by 2023.2. #if (_ONEDPL_LIBSYCL_VERSION >= 60100 && _ONEDPL_LIBSYCL_VERSION != 60200) -# define _ONEDPL_SYCL_USM_HOST_PRESENT 1 +# define _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT 1 #else -# define _ONEDPL_SYCL_USM_HOST_PRESENT 0 +# define _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT 0 #endif namespace __dpl_sycl diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h index 05047f61d77..27bfb0d4a88 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h @@ -149,6 +149,34 @@ struct _ModeConverter static constexpr access_mode __value = access_mode::discard_write; }; +template ::value_type> +using __default_alloc_vec_iter = typename std::vector::iterator; + +template ::value_type> +using __usm_shared_alloc_vec_iter = + typename std::vector>::iterator; + +template ::value_type> +using __usm_host_alloc_vec_iter = + typename std::vector>::iterator; + +// Evaluates to true if the provided type is an iterator with a value_type and if the implementation of a +// std::vector::iterator can be distinguished between three different allocators, the +// default, usm_shared, and usm_host. If all are distinct, it is very unlikely any non-usm based allocator +// could be confused with a usm allocator. +template +struct __vector_iter_distinguishes_by_allocator : std::false_type +{ +}; +template +struct __vector_iter_distinguishes_by_allocator< + Iter, std::enable_if_t, __usm_shared_alloc_vec_iter> && + !std::is_same_v<__default_alloc_vec_iter, __usm_host_alloc_vec_iter> && + !std::is_same_v<__usm_host_alloc_vec_iter, __usm_shared_alloc_vec_iter>>> + : std::true_type +{ +}; + } // namespace __internal template diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h new file mode 100644 index 00000000000..7003dffc96a --- /dev/null +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -0,0 +1,151 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file incorporates work covered by the following copyright and permission +// notice: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +// This file contains some specialization SYCL traits for some oneDPL types. +// +// Fancy iterators and internal functors which are device copyable when their +// template arguments are also device copyable should be explicitly specialized +// as such. This is important when template argument member variables may be +// device copyable but not trivially copyable. +// Include this header before a kernel submit SYCL code. + +#ifndef _ONEDPL_SYCL_TRAITS_H +#define _ONEDPL_SYCL_TRAITS_H + +#if __INTEL_LLVM_COMPILER && (__INTEL_LLVM_COMPILER < 20240100) + +# define _ONEDPL_DEVICE_COPYABLE(TYPE) \ + template \ + struct sycl::is_device_copyable, ::std::enable_if_t>>> \ + : ::std::conjunction...> \ + { \ + }; + +#else + +# define _ONEDPL_DEVICE_COPYABLE(TYPE) \ + template \ + struct sycl::is_device_copyable> : ::std::conjunction...> \ + { \ + }; + +#endif + +using namespace oneapi::dpl::__internal; + +_ONEDPL_DEVICE_COPYABLE(__not_pred) +_ONEDPL_DEVICE_COPYABLE(__reorder_pred) +_ONEDPL_DEVICE_COPYABLE(__equal_value_by_pred) +_ONEDPL_DEVICE_COPYABLE(__equal_value) +_ONEDPL_DEVICE_COPYABLE(__not_equal_value) +_ONEDPL_DEVICE_COPYABLE(__transform_functor) +_ONEDPL_DEVICE_COPYABLE(__transform_if_unary_functor) +_ONEDPL_DEVICE_COPYABLE(__transform_if_binary_functor) +_ONEDPL_DEVICE_COPYABLE(__replace_functor) +_ONEDPL_DEVICE_COPYABLE(__replace_copy_functor) + +template +struct fill_functor; + +template +struct generate_functor; + +template +struct equal_predicate; + +template +struct __search_n_unary_predicate; + +template +struct adjacent_find_fn; + +template +struct __is_heap_check; + +template +struct __create_mask_unique_copy; + +_ONEDPL_DEVICE_COPYABLE(fill_functor) +_ONEDPL_DEVICE_COPYABLE(generate_functor) +_ONEDPL_DEVICE_COPYABLE(__brick_fill) +_ONEDPL_DEVICE_COPYABLE(__brick_fill_n) +_ONEDPL_DEVICE_COPYABLE(__search_n_unary_predicate) +_ONEDPL_DEVICE_COPYABLE(__is_heap_check) +_ONEDPL_DEVICE_COPYABLE(equal_predicate) +_ONEDPL_DEVICE_COPYABLE(adjacent_find_fn) +_ONEDPL_DEVICE_COPYABLE(__create_mask_unique_copy) + +using namespace oneapi::dpl::__par_backend_hetero; + +template +struct __early_exit_find_or; + +_ONEDPL_DEVICE_COPYABLE(__early_exit_find_or); + +using namespace oneapi::dpl::unseq_backend; + +_ONEDPL_DEVICE_COPYABLE(walk_n) +_ONEDPL_DEVICE_COPYABLE(walk_adjacent_difference) +_ONEDPL_DEVICE_COPYABLE(transform_reduce) +_ONEDPL_DEVICE_COPYABLE(reduce_over_group) +_ONEDPL_DEVICE_COPYABLE(single_match_pred_by_idx) +_ONEDPL_DEVICE_COPYABLE(multiple_match_pred) +_ONEDPL_DEVICE_COPYABLE(n_elem_match_pred) +_ONEDPL_DEVICE_COPYABLE(first_match_pred) +_ONEDPL_DEVICE_COPYABLE(__create_mask) +_ONEDPL_DEVICE_COPYABLE(__copy_by_mask) +_ONEDPL_DEVICE_COPYABLE(__partition_by_mask) +_ONEDPL_DEVICE_COPYABLE(__global_scan_functor) +_ONEDPL_DEVICE_COPYABLE(__scan) +_ONEDPL_DEVICE_COPYABLE(__brick_includes) +_ONEDPL_DEVICE_COPYABLE(__brick_set_op) +_ONEDPL_DEVICE_COPYABLE(__brick_reduce_idx) + +using namespace oneapi::dpl::internal; + +template +struct custom_brick; + +template +struct replace_if_fun; + +template +class transform_if_stencil_fun; + +template +struct segmented_scan_fun; + +template +class scatter_and_accumulate_fun; + +template +struct scan_by_key_fun; + +_ONEDPL_DEVICE_COPYABLE(custom_brick) +_ONEDPL_DEVICE_COPYABLE(replace_if_fun) +_ONEDPL_DEVICE_COPYABLE(scan_by_key_fun) +_ONEDPL_DEVICE_COPYABLE(segmented_scan_fun) +_ONEDPL_DEVICE_COPYABLE(scatter_and_accumulate_fun) +_ONEDPL_DEVICE_COPYABLE(transform_if_stencil_fun) + +using namespace oneapi::dpl; + +_ONEDPL_DEVICE_COPYABLE(zip_iterator) +_ONEDPL_DEVICE_COPYABLE(transform_iterator) +_ONEDPL_DEVICE_COPYABLE(permutation_iterator) + +#undef _ONEDPL_DEVICE_COPYABLE + +#endif // _ONEDPL_SYCL_TRAITS_H diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h index ea0bbcc0010..9351b20dc88 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h @@ -22,6 +22,7 @@ #include "../../utils_ranges.h" #include "../../iterator_impl.h" #include "../../glue_numeric_defs.h" +#include "sycl_iterator.h" #include "sycl_defs.h" namespace oneapi @@ -206,6 +207,16 @@ struct is_passed_directly +struct is_passed_directly< + Iter, std::enable_if_t<(std::is_same_v> || + std::is_same_v>) && + oneapi::dpl::__internal::__vector_iter_distinguishes_by_allocator::value>> : + std::true_type +{ +}; + template struct is_passed_directly> : ::std::true_type { @@ -716,6 +727,32 @@ struct __get_sycl_range } }; +//---------------------------------------------------------- +// __select_backend (for the hetero policies) +//---------------------------------------------------------- + +//TODO required correct implementation of this __ranges::__select_backend() +// 1. There is still not RA ranges checks +// 2. Obviously, a return tag is not necessarily oneapi::dpl::__internal::__hetero_tag +template +oneapi::dpl::__internal::__hetero_tag +__select_backend(const execution::device_policy<_KernelName>&, _Ranges&&...) +{ + return {}; +} + +#if _ONEDPL_FPGA_DEVICE +//TODO required correct implementation of this __ranges::__select_backend() +// 1. There is still not RA ranges checks +// 2. Obviously, a return tag is not necessarily oneapi::dpl::__internal::__hetero_tag +template +oneapi::dpl::__internal::__hetero_tag +__select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _Ranges&&...) +{ + return {}; +} +#endif + } // namespace __ranges } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h index 27179622b6b..87d22e9a0a7 100644 --- a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h @@ -117,11 +117,12 @@ struct __hist_fill_zeros_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_histogram(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _Size __num_bins, _BinHash&& __func, _RandomAccessIterator2 __histogram_first) +template +void +__pattern_histogram(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _Size __num_bins, _BinHash&& __func, + _RandomAccessIterator2 __histogram_first) { //If there are no histogram bins there is nothing to do if (__num_bins > 0) @@ -143,7 +144,7 @@ __pattern_histogram(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _ //fill histogram bins with zeros auto __init_event = oneapi::dpl::__par_backend_hetero::__parallel_for( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__hist_fill_zeros_wrapper>(__exec), + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__hist_fill_zeros_wrapper>(__exec), unseq_backend::walk_n<_ExecutionPolicy, decltype(__fill_func)>{__fill_func}, __num_bins, __bins); if (__n > 0) @@ -156,8 +157,8 @@ __pattern_histogram(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _ _RandomAccessIterator1>(); auto __input_buf = __keep_input(__first, __last); - __parallel_histogram(::std::forward<_ExecutionPolicy>(__exec), __init_event, __input_buf.all_view(), - ::std::move(__bins), __binhash_manager) + __parallel_histogram(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __input_buf.all_view(), ::std::move(__bins), __binhash_manager) .wait(); } else diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 60c1001f5b8..ff7d9581e42 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -37,12 +37,12 @@ namespace __internal // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, + _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { if (__first1 == __last1) return __init; @@ -60,7 +60,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf1.all_view(), __buf2.all_view()) .get(); @@ -70,12 +70,12 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, - _BinaryOperation __binary_op, _UnaryOperation __unary_op, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) { if (__first == __last) return __init; @@ -88,7 +88,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf.all_view()) .get(); @@ -122,11 +122,12 @@ __iterators_possibly_equal(const sycl_iterator<_Mode1, _T, _Allocator>& __it1, } #endif // _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) +template +_Iterator2 +__pattern_transform_scan_base(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, + _BinaryOperation __binary_op, _Inclusive) { if (__first == __last) return __result; @@ -143,9 +144,9 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); auto __buf2 = __keep2(__result, __result + __n); - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), __buf2.all_view(), __n, - __unary_op, __init, __binary_op, _Inclusive{}) + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, + __unary_op, __init, __binary_op, _Inclusive{}) .wait(); } else @@ -168,48 +169,48 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It auto __buf2 = __keep2(__first_tmp, __last_tmp); // Run main algorithm and save data into temporary buffer - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(__policy, __buf1.all_view(), __buf2.all_view(), - __n, __unary_op, __init, __binary_op, _Inclusive{}) + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(_BackendTag{}, __policy, __buf1.all_view(), + __buf2.all_view(), __n, __unary_op, __init, + __binary_op, _Inclusive{}) .wait(); // Move data from temporary buffer into results - oneapi::dpl::__internal::__pattern_walk2_brick(::std::move(__policy), __first_tmp, __last_tmp, __result, - oneapi::dpl::__internal::__brick_move<_NewExecutionPolicy>{}, - ::std::true_type{}); + oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, ::std::move(__policy), __first_tmp, __last_tmp, __result, + oneapi::dpl::__internal::__brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer } return __result + __n; } - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator2 +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _Type __init, + _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); } // scan without initial element -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator2 +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _BinaryOperation __binary_op, _Inclusive) { using _Type = typename ::std::iterator_traits<_Iterator1>::value_type; using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, _InitType{}, __binary_op, _Inclusive{}); } @@ -223,11 +224,11 @@ struct adjacent_difference_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __d_first, _BinaryOperation __op, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) +template +_ForwardIterator2 +__pattern_adjacent_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first, + _ForwardIterator1 __last, _ForwardIterator2 __d_first, _BinaryOperation __op) { auto __n = __last - __first; if (__n <= 0) @@ -242,41 +243,34 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir // if we have the only element, just copy it according to the specification if (__n == 1) { - return __internal::__except_handler([&__exec, __first, __last, __d_first, __d_last, &__op]() { - auto __wrapped_policy = __par_backend_hetero::make_wrapped_policy( - ::std::forward<_ExecutionPolicy>(__exec)); - - __internal::__pattern_walk2_brick(__wrapped_policy, __first, __last, __d_first, - __internal::__brick_copy{}, - ::std::true_type{}); + auto __wrapped_policy = __par_backend_hetero::make_wrapped_policy( + ::std::forward<_ExecutionPolicy>(__exec)); - return __d_last; - }); + __internal::__pattern_walk2_brick(__tag, __wrapped_policy, __first, __last, __d_first, + __internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } else #endif { - return __internal::__except_handler([&__exec, __first, __last, __d_first, __d_last, &__op, __n]() { - auto __fn = [__op](_It1ValueT __in1, _It1ValueT __in2, _It2ValueTRef __out1) { - __out1 = __op(__in2, __in1); // This move assignment is allowed by the C++ standard draft N4810 - }; - - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); - auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator2>(); - auto __buf2 = __keep2(__d_first, __d_last); - - using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; - - oneapi::dpl::__par_backend_hetero::__parallel_for(__exec, _Function{__fn}, __n, __buf1.all_view(), - __buf2.all_view()) - .wait(); - - return __d_last; - }); + auto __fn = [__op](_It1ValueT __in1, _It1ValueT __in2, _It2ValueTRef __out1) { + __out1 = __op(__in2, __in1); // This move assignment is allowed by the C++ standard draft N4810 + }; + + auto __keep1 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); + auto __buf1 = __keep1(__first, __last); + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator2>(); + auto __buf2 = __keep2(__d_first, __d_last); + + using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; + + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, __exec, _Function{__fn}, __n, + __buf1.all_view(), __buf2.all_view()) + .wait(); } + + return __d_last; } } // namespace __internal diff --git a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h index 0b104a5ff4b..969b05ab914 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h @@ -37,11 +37,11 @@ namespace __ranges // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init, - _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { if (__rng1.empty()) return __init; @@ -51,7 +51,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2& return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)) .get(); @@ -61,10 +61,11 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2& // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init, _BinaryOperation __binary_op, - _UnaryOperation __unary_op) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op) { if (__rng.empty()) return __init; @@ -74,7 +75,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value ::std::forward<_Range>(__rng)) .get(); @@ -84,12 +85,11 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init // transform_scan //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _InitType __init, _BinaryOperation __binary_op, _Inclusive) +template +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) { if (__rng1.empty()) return 0; @@ -106,8 +106,8 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Rang _NoOpFunctor __get_data_op; oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - __binary_op, __init, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), + ::std::forward<_Range2>(__rng2), __binary_op, __init, // local scan unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, _NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, __assign_op, @@ -122,36 +122,34 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Rang return __rng1_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _Type __init, _BinaryOperation __binary_op, _Inclusive) +template +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), __unary_op, _InitType{__init}, __binary_op, - _Inclusive{}); + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), __unary_op, + _InitType{__init}, __binary_op, _Inclusive{}); } // scan without initial element -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _BinaryOperation __binary_op, _Inclusive) +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) { using _Type = oneapi::dpl::__internal::__value_t<_Range1>; using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), __unary_op, _InitType{}, __binary_op, - _Inclusive{}); + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), __unary_op, + _InitType{}, __binary_op, _Inclusive{}); } } // namespace __ranges diff --git a/include/oneapi/dpl/pstl/histogram_impl.h b/include/oneapi/dpl/pstl/histogram_impl.h index 4c8f5204793..362685bd19c 100644 --- a/include/oneapi/dpl/pstl/histogram_impl.h +++ b/include/oneapi/dpl/pstl/histogram_impl.h @@ -32,12 +32,14 @@ namespace dpl namespace __internal { -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, +void +__pattern_histogram(_Tag, _ExecutionPolicy&& exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _Size __num_bins, _IdxHashFunc __func, _RandomAccessIterator2 __histogram_first) { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + static_assert(sizeof(_Size) == 0 /*false*/, "Histogram API is currently unsupported for policies other than device execution policies"); } @@ -50,8 +52,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomA histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 first, _RandomAccessIterator1 last, _Size num_bins, _ValueType first_bin_min_val, _ValueType last_bin_max_val, _RandomAccessIterator2 histogram_first) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(exec, first, histogram_first); + oneapi::dpl::__internal::__pattern_histogram( - ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, oneapi::dpl::__internal::__evenly_divided_binhash<_ValueType>(first_bin_min_val, last_bin_max_val, num_bins), histogram_first); return histogram_first + num_bins; @@ -64,9 +68,11 @@ histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 first, _RandomAccessIt _RandomAccessIterator2 boundary_first, _RandomAccessIterator2 boundary_last, _RandomAccessIterator3 histogram_first) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(exec, first, boundary_first, histogram_first); + ::std::ptrdiff_t num_bins = boundary_last - boundary_first - 1; oneapi::dpl::__internal::__pattern_histogram( - ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, oneapi::dpl::__internal::__custom_boundary_binhash{boundary_first, boundary_last}, histogram_first); return histogram_first + num_bins; } diff --git a/include/oneapi/dpl/pstl/iterator_defs.h b/include/oneapi/dpl/pstl/iterator_defs.h index ad778064493..1b85d74fc40 100644 --- a/include/oneapi/dpl/pstl/iterator_defs.h +++ b/include/oneapi/dpl/pstl/iterator_defs.h @@ -27,61 +27,33 @@ namespace dpl namespace __internal { -// Internal wrapper around ::std::iterator_traits as it is required to be -// SFINAE-friendly(not produce "hard" error when _Ip is not an iterator) -// only starting with C++17. Although many standard library implementations -// provide it for older versions, we cannot rely on that. -template -struct __iterator_traits -{ -}; - -template -struct __iterator_traits<_Ip, - ::std::void_t> - : ::std::iterator_traits<_Ip> -{ -}; - -// Handles _Tp* and const _Tp* specializations -template -struct __iterator_traits<_Tp*, void> : ::std::iterator_traits<_Tp*> -{ -}; - -// Make is_random_access_iterator not to fail with a 'hard' error when it's used in SFINAE with -// a non-iterator type by providing a default value. -template -struct __is_random_access_iterator_impl : ::std::false_type -{ -}; +// Make is_random_access_iterator and is_forward_iterator not to fail with a 'hard' error when it's used in +// SFINAE with a non-iterator type by providing a default value. +template +auto +__is_iterator_of(int) -> decltype( + ::std::conjunction<::std::is_base_of< + _IteratorTag, typename ::std::iterator_traits<::std::decay_t<_IteratorTypes>>::iterator_category>...>{}); -template -struct __is_random_access_iterator_impl<_IteratorType, - ::std::void_t::iterator_category>> - : ::std::is_same::iterator_category, ::std::random_access_iterator_tag> -{ -}; +template +auto +__is_iterator_of(...) -> ::std::false_type; -/* iterator */ -template -struct __is_random_access_iterator - : ::std::conditional_t<__is_random_access_iterator_impl<_IteratorType>::value, - __is_random_access_iterator<_OtherIteratorTypes...>, ::std::false_type> +template +struct __is_random_access_iterator : decltype(__is_iterator_of<::std::random_access_iterator_tag, _IteratorTypes...>(0)) { }; -template -struct __is_random_access_iterator<_IteratorType> : __is_random_access_iterator_impl<_IteratorType> +template +struct __is_forward_iterator : decltype(__is_iterator_of<::std::forward_iterator_tag, _IteratorTypes...>(0)) { }; template -using __is_random_access_iterator_t = typename __is_random_access_iterator<_IteratorTypes...>::type; +inline constexpr bool __is_random_access_iterator_v = __is_random_access_iterator<_IteratorTypes...>::value; template -inline constexpr bool __is_random_access_iterator_v = __is_random_access_iterator<_IteratorTypes...>::value; +inline constexpr bool __is_forward_iterator_v = __is_forward_iterator<_IteratorTypes...>::value; } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index c663cd5c2d4..1c835443e78 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -25,6 +25,8 @@ namespace dpl { namespace __internal { +template +struct __parallel_tag; //------------------------------------------------------------------------ // transform_reduce (version with two binary functions, according to draft N4659) @@ -41,19 +43,17 @@ _Tp __brick_transform_reduce(_ForwardIterator1, _ForwardIterator1, _ForwardItera _BinaryOperation2, /*__is_vector=*/::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, - _BinaryOperation1, _BinaryOperation2, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, + _BinaryOperation1, _BinaryOperation2) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _Tp, _BinaryOperation1, _BinaryOperation2, _IsVector __is_vector, - /*is_parallel=*/::std::true_type); +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _Tp, _BinaryOperation1, _BinaryOperation2); //------------------------------------------------------------------------ // transform_reduce (version with unary and binary functions) @@ -67,19 +67,29 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, - _UnaryOperation, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Tp, _BinaryOperation, - _UnaryOperation, _IsVector, - /*is_parallel=*/::std::true_type); +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, + _BinaryOperation1, _BinaryOperation2 __bnary_op2) noexcept; + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _Tp, _BinaryOperation1, _BinaryOperation2); + +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, + _UnaryOperation) noexcept; + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Tp, _BinaryOperation, _UnaryOperation); //------------------------------------------------------------------------ // transform_exclusive_scan @@ -97,36 +107,30 @@ ::std::pair<_OutputIterator, _Tp> __brick_transform_scan(_RandomAccessIterator, _UnaryOperation, _Tp, _BinaryOperation, /*Inclusive*/ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation, _Tp, - _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _UnaryOperation, _Tp, _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - ::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _UnaryOperation, _Tp, _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::true_type); +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation, + _Tp, _BinaryOperation, _Inclusive) noexcept; + +template +::std::enable_if_t, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); + +template +::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); // transform_scan without initial element -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - _IsVector __is_vector, _IsParallel __is_parallel); +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator, _ForwardIterator, _OutputIterator, + _UnaryOperation, _BinaryOperation, _Inclusive); //------------------------------------------------------------------------ // adjacent_difference @@ -141,17 +145,16 @@ _OutputIterator __brick_adjacent_difference(_RandomAccessIterator, _RandomAccess _BinaryOperation, /*is_vector*/ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryOperation, - _IsVector, /*is_parallel*/ ::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _BinaryOperation, _IsVector, /*is_parallel*/ ::std::true_type); +template +_OutputIterator +__pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _BinaryOperation) noexcept; + +template +_RandomAccessIterator2 +__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _BinaryOperation); } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 8e65580918c..c479060dd09 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -61,36 +61,40 @@ __brick_transform_reduce(_RandomAccessIterator1 __first1, _RandomAccessIterator1 [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept + _BinaryOperation2 __binary_op2) noexcept { - return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, + _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { return __binary_op2(*__i, *(__first2 + (__i - __first1))); }, __init, __binary_op1, // Combine - [__first1, __first2, __binary_op1, __binary_op2, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp { + [__first1, __first2, __binary_op1, __binary_op2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, + _Tp __init) -> _Tp { return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1, - __binary_op2, __is_vector); + __binary_op2, _IsVector{}); }); }); } @@ -123,29 +127,33 @@ __brick_transform_reduce(_RandomAccessIterator __first, _RandomAccessIterator __ [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, - _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op) noexcept { - return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, - [__unary_op, __binary_op, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { - return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector); + [__unary_op, __binary_op](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { + return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, _IsVector{}); }); }); } @@ -228,31 +236,34 @@ __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __la /*is_vector=*/::std::false_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept + _Inclusive) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), - __is_vector) + typename _Tag::__is_vector{}) .first; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +::std::enable_if_t, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, _Inclusive) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; return __internal::__except_handler([&]() { __par_backend::__parallel_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __last - __first, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __last - __first, [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, __binary_op, [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { @@ -261,24 +272,24 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs __unary_op, /*__is_vector*/ ::std::false_type()); }, - [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j, - _Tp __init) { + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, _Tp __init) { return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, - __init, __binary_op, _Inclusive(), __is_vector) + __init, __binary_op, _Inclusive(), _IsVector{}) .second; }); return __result + (__last - __first); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - ::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, _Inclusive) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; _DifferenceType __n = __last - __first; @@ -286,12 +297,13 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs { return __result; } + return __internal::__except_handler([&]() { __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, __init, - [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __init, + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, - __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector) + __unary_op, _Tp{}, __binary_op, _Inclusive(), _IsVector{}) .second; }, __binary_op, @@ -309,20 +321,22 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs } // transform_scan without initial element -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - _IsVector __is_vector, _IsParallel __is_parallel) +template +_OutputIterator +__pattern_transform_scan(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) { + static_assert(__is_host_dispatch_tag_v<_Tag>); + typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; if (__first != __last) { _ValueType __tmp = __unary_op(*__first); *__result = __tmp; - return __pattern_transform_scan(::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, - __unary_op, __tmp, __binary_op, _Inclusive(), __is_vector, __is_parallel); + + return __pattern_transform_scan(__tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, + __unary_op, __tmp, __binary_op, _Inclusive()); } else { @@ -360,38 +374,42 @@ __brick_adjacent_difference(_RandomAccessIterator1 __first, _RandomAccessIterato [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector, - /*is_parallel*/ ::std::false_type) noexcept +template +_OutputIterator +__pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __d_first, _BinaryOperation __op) noexcept { - return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_adjacent_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, _BinaryOperation __op) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + assert(__first != __last); typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; *__d_first = *__first; - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, - [&__op, __is_vector, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { - _RandomAccessIterator2 __d_b = __d_first + (__b - __first); - __internal::__brick_walk3( - __b, __e, __b + 1, __d_b + 1, - [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, - __is_vector); - }); - return __d_first + (__last - __first); + + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, + [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { + _RandomAccessIterator2 __d_b = __d_first + (__b - __first); + __internal::__brick_walk3( + __b, __e, __b + 1, __d_b + 1, + [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, + _IsVector{}); + }); + return __d_first + (__last - __first); + }); } } // namespace __internal diff --git a/include/oneapi/dpl/pstl/omp/parallel_for.h b/include/oneapi/dpl/pstl/omp/parallel_for.h index 5b6ed66453a..1a0ea24d798 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for.h @@ -49,7 +49,7 @@ __parallel_for_body(_Index __first, _Index __last, _Fp __f) template void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) { if (omp_in_parallel()) { diff --git a/include/oneapi/dpl/pstl/omp/parallel_for_each.h b/include/oneapi/dpl/pstl/omp/parallel_for_each.h index 7877ef095ef..32410cbe927 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for_each.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for_each.h @@ -44,7 +44,8 @@ __parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp template void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f) +__parallel_for_each(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _ForwardIterator __first, + _ForwardIterator __last, _Fp __f) { if (omp_in_parallel()) { diff --git a/include/oneapi/dpl/pstl/omp/parallel_invoke.h b/include/oneapi/dpl/pstl/omp/parallel_invoke.h index 32491ab9dfd..3503096add5 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_invoke.h +++ b/include/oneapi/dpl/pstl/omp/parallel_invoke.h @@ -38,7 +38,7 @@ __parallel_invoke_body(_F1&& __f1, _F2&& __f2) template void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +__parallel_invoke(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) { if (omp_in_parallel()) { diff --git a/include/oneapi/dpl/pstl/omp/parallel_merge.h b/include/oneapi/dpl/pstl/omp/parallel_merge.h index 911d4b2643b..162ef097801 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_merge.h +++ b/include/oneapi/dpl/pstl/omp/parallel_merge.h @@ -71,10 +71,9 @@ __parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessI template void -__parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, - _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, - _LeafMerge __leaf_merge) - +__parallel_merge(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) { std::size_t __size_x = __xe - __xs; std::size_t __size_y = __ye - __ys; diff --git a/include/oneapi/dpl/pstl/omp/parallel_reduce.h b/include/oneapi/dpl/pstl/omp/parallel_reduce.h index beefe09b738..4fc62cdf3d8 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_reduce.h +++ b/include/oneapi/dpl/pstl/omp/parallel_reduce.h @@ -52,8 +52,8 @@ __parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __la template _Value -__parallel_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, - _RealBody __real_body, _Reduction __reduction) +__parallel_reduce(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Value __identity, _RealBody __real_body, _Reduction __reduction) { // We don't create a nested parallel region in an existing parallel region: // just create tasks. diff --git a/include/oneapi/dpl/pstl/omp/parallel_scan.h b/include/oneapi/dpl/pstl/omp/parallel_scan.h index 29c6c77be54..c3bc022cb2e 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_scan.h +++ b/include/oneapi/dpl/pstl/omp/parallel_scan.h @@ -82,13 +82,14 @@ __downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsi template void -__parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +__parallel_strict_scan_body(_ExecutionPolicy&& __exec, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, + _Sp __scan, _Ap __apex) { _Index __p = omp_get_num_threads(); const _Index __slack = 4; _Index __tilesize = (__n - 1) / (__slack * __p) + 1; _Index __m = (__n - 1) / __tilesize; - __buffer<_ExecutionPolicy, _Tp> __buf(__m + 1); + __buffer<_ExecutionPolicy, _Tp> __buf(::std::forward<_ExecutionPolicy>(__exec), __m + 1); _Tp* __r = __buf.get(); oneapi::dpl::__omp_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, @@ -108,8 +109,8 @@ __parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combi template void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) +__parallel_strict_scan(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& __exec, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) { if (__n <= __default_chunk_size) { @@ -128,16 +129,16 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu if (omp_in_parallel()) { - oneapi::dpl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine, - __scan, __apex); + oneapi::dpl::__omp_backend::__parallel_strict_scan_body(::std::forward<_ExecutionPolicy>(__exec), __n, + __initial, __reduce, __combine, __scan, __apex); } else { _PSTL_PRAGMA(omp parallel) _PSTL_PRAGMA(omp single nowait) { - oneapi::dpl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, - __combine, __scan, __apex); + oneapi::dpl::__omp_backend::__parallel_strict_scan_body(::std::forward<_ExecutionPolicy>(__exec), __n, + __initial, __reduce, __combine, __scan, __apex); } } } diff --git a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h index 14aa7b7bf04..4633a3fcade 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h +++ b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h @@ -123,8 +123,9 @@ __parallel_stable_sort_body(_RandomAccessIterator __xs, _RandomAccessIterator __ template void -__parallel_stable_sort(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator __xs, _RandomAccessIterator __xe, - _Compare __comp, _LeafSort __leaf_sort, std::size_t __nsort = 0) +__parallel_stable_sort(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, + _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, + std::size_t __nsort = 0) { auto __count = static_cast(__xe - __xs); if (__count <= __default_chunk_size || __nsort < __count) diff --git a/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h b/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h index d94e5fd36e9..2c6cf06577b 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h +++ b/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h @@ -86,8 +86,9 @@ __transform_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __l template _Value -__parallel_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryOp __unary_op, _Value __init, _Combiner __combiner, _Reduction __reduction) +__parallel_transform_reduce(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, + _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, + _Value __init, _Combiner __combiner, _Reduction __reduction) { _Value __result = __init; if (omp_in_parallel()) diff --git a/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h b/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h index 98262635d1e..35c28b4330c 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h +++ b/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h @@ -27,8 +27,8 @@ namespace __omp_backend template _Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up /* __u */, _Tp __init, _Cp /* __combine */, - _Rp /* __brick_reduce */, _Sp __scan) +__parallel_transform_scan(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __n, _Up /* __u */, + _Tp __init, _Cp /* __combine */, _Rp /* __brick_reduce */, _Sp __scan) { // TODO: parallelize this function. return __scan(_Index(0), __n, __init); diff --git a/include/oneapi/dpl/pstl/omp/util.h b/include/oneapi/dpl/pstl/omp/util.h index bcbfecc23e4..e7c4e3cbc40 100644 --- a/include/oneapi/dpl/pstl/omp/util.h +++ b/include/oneapi/dpl/pstl/omp/util.h @@ -48,7 +48,7 @@ namespace __omp_backend // use to cancel execution //------------------------------------------------------------------------ inline void -__cancel_execution() +__cancel_execution(oneapi::dpl::__internal::__omp_backend_tag) { // TODO: Figure out how to make cancellation work. } @@ -68,9 +68,10 @@ class __buffer_impl operator=(const __buffer_impl&) = delete; public: - static_assert(::std::is_same_v<_ExecutionPolicy, ::std::decay_t<_ExecutionPolicy>>); - - __buffer_impl(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {} + __buffer_impl(_ExecutionPolicy /*__exec*/, std::size_t __n) + : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) + { + } operator bool() const { return __ptr_ != nullptr; } diff --git a/include/oneapi/dpl/pstl/parallel_backend.h b/include/oneapi/dpl/pstl/parallel_backend.h index 1e78d1f635b..b243e8fb492 100644 --- a/include/oneapi/dpl/pstl/parallel_backend.h +++ b/include/oneapi/dpl/pstl/parallel_backend.h @@ -18,14 +18,14 @@ // Select a parallel backend #if ONEDPL_USE_TBB_BACKEND || (!defined(ONEDPL_USE_TBB_BACKEND) && !ONEDPL_USE_OPENMP_BACKEND && _ONEDPL_TBB_AVAILABLE) -# include "parallel_backend_tbb.h" # define _ONEDPL_PAR_BACKEND_TBB 1 +# include "parallel_backend_tbb.h" #elif ONEDPL_USE_OPENMP_BACKEND || (!defined(ONEDPL_USE_OPENMP_BACKEND) && _ONEDPL_OPENMP_AVAILABLE) -# include "parallel_backend_omp.h" # define _ONEDPL_PAR_BACKEND_OPENMP 1 +# include "parallel_backend_omp.h" #else -# include "parallel_backend_serial.h" # define _ONEDPL_PAR_BACKEND_SERIAL 1 +# include "parallel_backend_serial.h" #endif #if _ONEDPL_BACKEND_SYCL diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index a2dd6468a34..edd6652d359 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -43,9 +43,10 @@ class __buffer_impl operator=(const __buffer_impl&) = delete; public: - static_assert(::std::is_same_v<_ExecutionPolicy, ::std::decay_t<_ExecutionPolicy>>); - - __buffer_impl(::std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {} + __buffer_impl(_ExecutionPolicy /*__exec*/, ::std::size_t __n) + : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) + { + } operator bool() const { return __ptr_ != nullptr; } _Tp* @@ -60,21 +61,22 @@ template using __buffer = __buffer_impl<::std::decay_t<_ExecutionPolicy>, _Tp>; inline void -__cancel_execution() +__cancel_execution(oneapi::dpl::__internal::__serial_backend_tag) { } template void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + _Fp __f) { __f(__first, __last); } template _Value -__parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, - const _RealBody& __real_body, const _Reduction&) +__parallel_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + const _Value& __identity, const _RealBody& __real_body, const _Reduction&) { if (__first == __last) { @@ -88,16 +90,16 @@ __parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Valu template _Tp -__parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, - _Reduce __reduce) +__parallel_transform_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, + _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) { return __reduce(__first, __last, __init); } template void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) +__parallel_strict_scan(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) { _Tp __sum = __initial; if (__n) @@ -109,15 +111,16 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu template _Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _UnaryOp, _Tp __init, _BinaryOp, _Reduce, _Scan __scan) +__parallel_transform_scan(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _UnaryOp, + _Tp __init, _BinaryOp, _Reduce, _Scan __scan) { return __scan(_Index(0), __n, __init); } template void -__parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _LeafSort __leaf_sort, ::std::size_t = 0) +__parallel_stable_sort(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort, ::std::size_t = 0) { __leaf_sort(__first, __last, __comp); } @@ -125,16 +128,16 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _Rando template void -__parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __outit, - _Compare __comp, _LeafMerge __leaf_merge) +__parallel_merge(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __outit, _Compare __comp, _LeafMerge __leaf_merge) { __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); } template void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +__parallel_invoke(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) { ::std::forward<_F1>(__f1)(); ::std::forward<_F2>(__f2)(); @@ -142,7 +145,8 @@ __parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) template void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) +__parallel_for_each(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, + _ForwardIterator __end, _Fp __f) { for (auto __iter = __begin; __iter != __end; ++__iter) __f(*__iter); diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 556e305e1c7..2ddfa61007a 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -23,6 +23,7 @@ #include #include "parallel_backend_utils.h" +#include "execution_impl.h" // Bring in minimal required subset of Intel(R) Threading Building Blocks (Intel(R) TBB) #include @@ -64,10 +65,11 @@ class __buffer_impl operator=(const __buffer_impl&) = delete; public: - static_assert(::std::is_same_v<_ExecutionPolicy, ::std::decay_t<_ExecutionPolicy>>); - //! Try to obtain buffer of given size to store objects of _Tp type - __buffer_impl(const ::std::size_t __n) : _M_allocator(), _M_ptr(_M_allocator.allocate(__n)), _M_buf_size(__n) {} + __buffer_impl(_ExecutionPolicy /*__exec*/, const ::std::size_t __n) + : _M_allocator(), _M_ptr(_M_allocator.allocate(__n)), _M_buf_size(__n) + { + } //! True if buffer was successfully obtained, zero otherwise. operator bool() const { return _M_ptr != nullptr; } //! Return pointer to buffer, or nullptr if buffer could not be obtained. @@ -85,7 +87,7 @@ using __buffer = __buffer_impl<::std::decay_t<_ExecutionPolicy>, _Tp>; // Wrapper for tbb::task inline void -__cancel_execution() +__cancel_execution(oneapi::dpl::__internal::__tbb_backend_tag) { #if TBB_INTERFACE_VERSION <= 12000 tbb::task::self().group()->cancel_group_execution(); @@ -118,7 +120,7 @@ class __parallel_for_body // wrapper over tbb::parallel_for template void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) { tbb::this_task_arena::isolate([=]() { tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), __parallel_for_body<_Index, _Fp>(__f)); @@ -129,8 +131,8 @@ __parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) // wrapper over tbb::parallel_reduce template _Value -__parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, - const _RealBody& __real_body, const _Reduction& __reduction) +__parallel_reduce(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + const _Value& __identity, const _RealBody& __real_body, const _Reduction& __reduction) { return tbb::this_task_arena::isolate([__first, __last, &__identity, &__real_body, &__reduction]() -> _Value { return tbb::parallel_reduce( @@ -210,8 +212,8 @@ struct __par_trans_red_body template _Tp -__parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _Up __u, _Tp __init, _Cp __combine, - _Rp __brick_reduce) +__parallel_transform_reduce(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, + _Index __last, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce) { __tbb_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); // The grain size of 3 is used in order to provide minimum 2 elements for each body @@ -379,8 +381,8 @@ __downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsi // T must have a trivial constructor and destructor. template void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) +__parallel_strict_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&& __exec, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) { tbb::this_task_arena::isolate([=, &__combine]() { if (__n > 1) @@ -389,7 +391,7 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu const _Index __slack = 4; _Index __tilesize = (__n - 1) / (__slack * __p) + 1; _Index __m = (__n - 1) / __tilesize; - __tbb_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__m + 1); + __tbb_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __m + 1); _Tp* __r = __buf.get(); __tbb_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, __combine); @@ -419,8 +421,8 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu template _Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce, - _Sp __scan) +__parallel_transform_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Up __u, + _Tp __init, _Cp __combine, _Rp __brick_reduce, _Sp __scan) { __trans_scan_body<_Index, _Up, _Tp, _Cp, _Rp, _Sp> __body(__u, __init, __combine, __brick_reduce, __scan); auto __range = tbb::blocked_range<_Index>(0, __n); @@ -1182,8 +1184,9 @@ __stable_sort_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _Le template void -__parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, - _LeafSort __leaf_sort, ::std::size_t __nsort) +__parallel_stable_sort(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, + ::std::size_t __nsort) { tbb::this_task_arena::isolate([=, &__nsort]() { //sorting based on task tree and parallel merge @@ -1194,7 +1197,7 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAc const _DifferenceType __sort_cut_off = _ONEDPL_STABLE_SORT_CUT_OFF; if (__n > __sort_cut_off) { - __tbb_backend::__buffer<_ExecutionPolicy, _ValueType> __buf(__n); + __tbb_backend::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __n); __root_task<__stable_sort_func<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>> __root{ __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()}; __task::spawn_root_and_wait(__root); @@ -1274,9 +1277,9 @@ operator()(__task* __self) template void -__parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, - _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, - _LeafMerge __leaf_merge) +__parallel_merge(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) { typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; @@ -1303,9 +1306,10 @@ __parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessI //------------------------------------------------------------------------ // parallel_invoke //------------------------------------------------------------------------ + template void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +__parallel_invoke(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) { //TODO: a version of tbb::this_task_arena::isolate with variadic arguments pack should be added in the future tbb::this_task_arena::isolate( @@ -1315,9 +1319,11 @@ __parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) //------------------------------------------------------------------------ // parallel_for_each //------------------------------------------------------------------------ + template void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) +__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, + _ForwardIterator __end, _Fp __f) { tbb::this_task_arena::isolate([&]() { tbb::parallel_for_each(__begin, __end, __f); }); } diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index a2d7d20e562..66d9d8d1741 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -32,10 +32,13 @@ namespace __internal //----------------------------------------------------------------------- /** Return extremum value returned by brick f[i,j) for subranges [i,j) of [first,last) Each f[i,j) must return a value in [i,j). */ -template +template _Index -__parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) +__parallel_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, + _IsFirst) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_Index>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; _DifferenceType __initial_dist = _IsFirst::value ? __n : -1; @@ -44,7 +47,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick ::std::atomic<_DifferenceType> __extremum(__initial_dist); // TODO: find out what is better here: parallel_for or parallel_reduce - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of // why using a shared variable scales fairly well in this situation. @@ -70,17 +73,19 @@ __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick // parallel_or //------------------------------------------------------------------------ //! Return true if brick f[i,j) returns true for some subrange [i,j) of [first,last) -template +template bool -__parallel_or(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) +__parallel_or(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + ::std::atomic __found(false); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__f, &__found](_Index __i, _Index __j) { if (!__found.load(::std::memory_order_relaxed) && __f(__i, __j)) { __found.store(true, ::std::memory_order_relaxed); - __par_backend::__cancel_execution(); + __par_backend::__cancel_execution(__backend_tag{}); } }); return __found; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 91adf973d02..8893497fc99 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,7 +12,10 @@ # ##===----------------------------------------------------------------------===## add_subdirectory(kt) -add_subdirectory(distributed-ranges/shp) + +if (ONEDPL_USE_DR) + add_subdirectory(distributed-ranges/shp) +endif() # rng_tests set (ranlux_24_48_test.pass_timeout_debug "900") # 15min set (ranlux_24_48_test.pass_timeout_release "720") # 12min @@ -66,10 +69,16 @@ endif() add_custom_target(build-onedpl-tests COMMENT "Build all oneDPL tests") +if (ONEDPL_USE_DR) + set(run-onedpl-tests-depends build-onedpl-tests shp-tests shp-tests-3) +else() + set(run-onedpl-tests-depends build-onedpl-tests) +endif() + add_custom_target(run-onedpl-tests COMMAND "${CMAKE_CTEST_COMMAND}" --output-on-failure USES_TERMINAL - DEPENDS build-onedpl-tests + DEPENDS ${run-onedpl-tests-depends} COMMENT "Build and run all oneDPL tests") macro(onedpl_construct_exec test_source_file _test_name switch_off_checked_iterators custom_define) @@ -249,3 +258,8 @@ if (TARGET interop_allocs.pass) target_sources(interop_allocs.pass PRIVATE "${CMAKE_CURRENT_LIST_DIR}/pstl_offload/memory/interop_allocs_system.cpp" "${CMAKE_CURRENT_LIST_DIR}/pstl_offload/memory/interop_allocs_usm.cpp") endif() + +if (ONEDPL_USE_DR) + add_custom_target(build-dr-tests COMMENT "Build dr tests" DEPENDS shp-tests shp-tests-3) + add_custom_target(run-dr-tests COMMENT "Run dr tests" DEPENDS build-dr-tests COMMAND ./distributed-ranges/shp/shp-tests ./distributed-ranges/shp/shp-tests-3) +endif() \ No newline at end of file diff --git a/test/distributed-ranges/common/counted.cpp b/test/distributed-ranges/common/counted.cpp index a471ddda762..c7e7ef6355e 100644 --- a/test/distributed-ranges/common/counted.cpp +++ b/test/distributed-ranges/common/counted.cpp @@ -72,44 +72,44 @@ TYPED_TEST(Counted, countedOfOneElementHasOneSegmentAndSameRank) { TypeParam dv(10, 77); auto counted_view_result = xhp::views::counted(dv.end() - 1, 1); - auto counted_view_segments = dr::ranges::segments(counted_view_result); - auto dv_segments = dr::ranges::segments(dv); + auto counted_view_segments = experimental::dr::ranges::segments(counted_view_result); + auto dv_segments = experimental::dr::ranges::segments(dv); auto last_segment_index = dv_segments.size() - 1; EXPECT_TRUE(check_segments(counted_view_result)); EXPECT_EQ(rng::size(counted_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(counted_view_segments[0]), - dr::ranges::rank(dv_segments[last_segment_index])); + EXPECT_EQ(experimental::dr::ranges::rank(counted_view_segments[0]), + experimental::dr::ranges::rank(dv_segments[last_segment_index])); } TYPED_TEST(Counted, countedOfFirstSegementHasOneSegmentAndSameRank) { TypeParam dv(123456, 77); - const auto first_seg_size = dr::ranges::segments(dv)[0].size(); + const auto first_seg_size = experimental::dr::ranges::segments(dv)[0].size(); std::size_t bias = 2; // test assumes there are not too many ranks - assert(dr::ranges::segments(dv)[0].size() > bias); + assert(experimental::dr::ranges::segments(dv)[0].size() > bias); auto counted_view_result = xhp::views::counted(dv.begin() + bias, first_seg_size - bias); - auto counted_view_segments = dr::ranges::segments(counted_view_result); + auto counted_view_segments = experimental::dr::ranges::segments(counted_view_result); EXPECT_EQ(rng::size(counted_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(counted_view_segments[0]), - dr::ranges::rank(dr::ranges::segments(dv)[0])); + EXPECT_EQ(experimental::dr::ranges::rank(counted_view_segments[0]), + experimental::dr::ranges::rank(experimental::dr::ranges::segments(dv)[0])); } TYPED_TEST(Counted, countedOfAllButOneSizeHasAllSegmentsWithSameRanks) { TypeParam dv(EVENLY_DIVIDABLE_SIZE, 77); - auto dv_segments = dr::ranges::segments(dv); + auto dv_segments = experimental::dr::ranges::segments(dv); std::size_t bias = 1; // test assumes there are not too many ranks assert(dv_segments[0].size() > bias); auto counted_view_result = xhp::views::counted(dv.begin() + bias, EVENLY_DIVIDABLE_SIZE - bias); - auto counted_view_segments = dr::ranges::segments(counted_view_result); + auto counted_view_segments = experimental::dr::ranges::segments(counted_view_result); EXPECT_EQ(rng::size(dv_segments), rng::size(counted_view_segments)); for (std::size_t i = 0; i < rng::size(dv_segments); ++i) - EXPECT_EQ(dr::ranges::rank(dv_segments[i]), - dr::ranges::rank(counted_view_segments[i])); + EXPECT_EQ(experimental::dr::ranges::rank(dv_segments[i]), + experimental::dr::ranges::rank(counted_view_segments[i])); } diff --git a/test/distributed-ranges/common/distributed_vector.cpp b/test/distributed-ranges/common/distributed_vector.cpp index 721f7c49419..b78baf7e2fe 100644 --- a/test/distributed-ranges/common/distributed_vector.cpp +++ b/test/distributed-ranges/common/distributed_vector.cpp @@ -12,38 +12,30 @@ template class DistributedVectorAllTypes : public testing::Test { TYPED_TEST_SUITE(DistributedVectorAllTypes, AllTypes); TYPED_TEST(DistributedVectorAllTypes, StaticAsserts) { - DRLOG("Running StaticAsserts test"); TypeParam dv(10); static_assert(rng::random_access_range); static_assert(rng::random_access_range); static_assert(rng::viewable_range); static_assert(std::forward_iterator); - static_assert(dr::distributed_iterator); + static_assert(experimental::dr::distributed_iterator); static_assert(rng::forward_range); static_assert(rng::random_access_range); - static_assert(dr::distributed_contiguous_range); + static_assert(experimental::dr::distributed_contiguous_range); } TYPED_TEST(DistributedVectorAllTypes, getAndPut) { - DRLOG("Running getAndPut test"); TypeParam dv(10); if (comm_rank == 0) { - DRLOG("DV constructed, assign sth on root rank"); dv[5] = 13; - DRLOG("13 assigned on root, now calling fence"); } else { - DRLOG("DV constructed, we are on non-root rank so just call fence"); } fence_on(dv); - DRLOG("barrier called now reading"); for (std::size_t idx = 0; idx < 10; ++idx) { - DRLOG("reading idx:{}", idx); auto val = dv[idx]; - DRLOG("read idx:{} finished, got:{}", idx, val); if (idx == 5) { EXPECT_EQ(val, 13); } else { @@ -53,7 +45,6 @@ TYPED_TEST(DistributedVectorAllTypes, getAndPut) { } TYPED_TEST(DistributedVectorAllTypes, Stream) { - DRLOG("Running Stream test"); Ops1 ops(10); std::ostringstream os; os << ops.dist_vec; @@ -61,7 +52,6 @@ TYPED_TEST(DistributedVectorAllTypes, Stream) { } TYPED_TEST(DistributedVectorAllTypes, Equality) { - DRLOG("Running Equality test"); Ops1 ops(10); iota(ops.dist_vec, 100); rng::iota(ops.vec, 100); @@ -70,7 +60,6 @@ TYPED_TEST(DistributedVectorAllTypes, Equality) { } TYPED_TEST(DistributedVectorAllTypes, Segments) { - DRLOG("Running Segments test"); Ops1 ops(10); EXPECT_TRUE(check_segments(ops.dist_vec)); @@ -79,7 +68,6 @@ TYPED_TEST(DistributedVectorAllTypes, Segments) { } TEST(DistributedVector, ConstructorBasic) { - DRLOG("Running ConstructorBasic test"); xhp::distributed_vector dist_vec(10); iota(dist_vec, 100); @@ -90,7 +78,6 @@ TEST(DistributedVector, ConstructorBasic) { } TEST(DistributedVector, ConstructorFill) { - DRLOG("Running ConstructorFill test"); xhp::distributed_vector dist_vec(10, 1); std::vector local_vec(10, 1); diff --git a/test/distributed-ranges/common/drop.cpp b/test/distributed-ranges/common/drop.cpp index ea13b600214..7fb271d0db3 100644 --- a/test/distributed-ranges/common/drop.cpp +++ b/test/distributed-ranges/common/drop.cpp @@ -78,48 +78,48 @@ TYPED_TEST(Drop, largeDropOfAllButOneHasSameSegmentAndRank) { auto drop_view_result = xhp::views::drop(dv, 123456 - 1); - auto drop_view_segments = dr::ranges::segments(drop_view_result); - auto dv_segments = dr::ranges::segments(dv); + auto drop_view_segments = experimental::dr::ranges::segments(drop_view_result); + auto dv_segments = experimental::dr::ranges::segments(dv); auto last_segment_index = dv_segments.size() - 1; EXPECT_TRUE(check_segments(drop_view_result)); EXPECT_EQ(rng::size(drop_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(drop_view_segments[0]), - dr::ranges::rank(dv_segments[last_segment_index])); + EXPECT_EQ(experimental::dr::ranges::rank(drop_view_segments[0]), + experimental::dr::ranges::rank(dv_segments[last_segment_index])); } TYPED_TEST(Drop, dropOfAllElementsButOneHasOneSegmentAndSameRank) { TypeParam dv(10, 77); auto drop_view_result = xhp::views::drop(dv, 9); - auto drop_view_segments = dr::ranges::segments(drop_view_result); - auto dv_segments = dr::ranges::segments(dv); + auto drop_view_segments = experimental::dr::ranges::segments(drop_view_result); + auto dv_segments = experimental::dr::ranges::segments(dv); auto last_segment_index = dv_segments.size() - 1; EXPECT_TRUE(check_segments(drop_view_result)); EXPECT_EQ(rng::size(drop_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(drop_view_segments[0]), - dr::ranges::rank(dv_segments[last_segment_index])); + EXPECT_EQ(experimental::dr::ranges::rank(drop_view_segments[0]), + experimental::dr::ranges::rank(dv_segments[last_segment_index])); } TYPED_TEST(Drop, dropOfFirstSegementHasSameSegmentsSize) { TypeParam dv(10, 77); - const auto first_seg_size = dr::ranges::segments(dv)[0].size(); + const auto first_seg_size = experimental::dr::ranges::segments(dv)[0].size(); auto drop_view_result = xhp::views::drop(dv, first_seg_size); - auto drop_view_segments = dr::ranges::segments(drop_view_result); - EXPECT_EQ(rng::size(drop_view_segments), dr::ranges::segments(dv).size() - 1); + auto drop_view_segments = experimental::dr::ranges::segments(drop_view_result); + EXPECT_EQ(rng::size(drop_view_segments), experimental::dr::ranges::segments(dv).size() - 1); } TYPED_TEST(Drop, dropOfOneElementHasAllSegmentsWithSameRanks) { TypeParam dv(EVENLY_DIVIDABLE_SIZE, 77); - auto dv_segments = dr::ranges::segments(dv); + auto dv_segments = experimental::dr::ranges::segments(dv); auto drop_view_result = xhp::views::drop(dv, 1); - auto drop_view_segments = dr::ranges::segments(drop_view_result); + auto drop_view_segments = experimental::dr::ranges::segments(drop_view_result); EXPECT_EQ(rng::size(dv_segments), rng::size(drop_view_segments)); for (std::size_t i = 0; i < rng::size(dv_segments); ++i) - EXPECT_EQ(dr::ranges::rank(dv_segments[i]), - dr::ranges::rank(drop_view_segments[i])); + EXPECT_EQ(experimental::dr::ranges::rank(dv_segments[i]), + experimental::dr::ranges::rank(drop_view_segments[i])); } diff --git a/test/distributed-ranges/common/iota_view.cpp b/test/distributed-ranges/common/iota_view.cpp index 39242a101f2..941d30b419d 100644 --- a/test/distributed-ranges/common/iota_view.cpp +++ b/test/distributed-ranges/common/iota_view.cpp @@ -12,7 +12,7 @@ TYPED_TEST_SUITE(IotaView, AllTypes); TYPED_TEST(IotaView, ZipWithDR) { xhp::distributed_vector dv(10); - auto v = dr::views::iota(1, 10); + auto v = experimental::dr::views::iota(1, 10); auto z = xhp::views::zip(dv, v); @@ -26,7 +26,7 @@ TYPED_TEST(IotaView, ZipWithDR) { TYPED_TEST(IotaView, Copy) { TypeParam dv(10); - auto v = dr::views::iota(1, 11); + auto v = experimental::dr::views::iota(1, 11); xhp::copy(v, dv.begin()); @@ -36,7 +36,7 @@ TYPED_TEST(IotaView, Copy) { TYPED_TEST(IotaView, Transform) { TypeParam dv(10); - auto v = dr::views::iota(1, 11); + auto v = experimental::dr::views::iota(1, 11); auto negate = [](auto v) { return -v; }; xhp::transform(v, dv.begin(), negate); @@ -47,7 +47,7 @@ TYPED_TEST(IotaView, Transform) { TYPED_TEST(IotaView, ForEach) { TypeParam dv(10); - auto v = dr::views::iota(1, 11); + auto v = experimental::dr::views::iota(1, 11); auto negate = [](auto v) { auto &[in, out] = v; diff --git a/test/distributed-ranges/common/sycl_utils.cpp b/test/distributed-ranges/common/sycl_utils.cpp index 2e4c58bcc81..99d4aab68a1 100644 --- a/test/distributed-ranges/common/sycl_utils.cpp +++ b/test/distributed-ranges/common/sycl_utils.cpp @@ -20,7 +20,7 @@ TEST(SYCLUtils, ParalelFor1D) { auto seta = [a](auto i) { a[i] = i; }; auto setb = [b](auto i) { b[i] = i; }; q.parallel_for(range, seta).wait(); - dr::__detail::parallel_for(q, range, setb).wait(); + experimental::dr::__detail::parallel_for(q, range, setb).wait(); EXPECT_EQ(rng::span(a, size), rng::span(b, size)); } @@ -45,7 +45,7 @@ TEST(SYCLUtils, ParalelFor2D) { auto setb = [mdb](auto index) { mdb(index[0], index[1]) = 22; }; q.parallel_for(range, seta).wait(); - dr::__detail::parallel_for(q, range, setb).wait(); + experimental::dr::__detail::parallel_for(q, range, setb).wait(); EXPECT_EQ(rng::span(a, size), rng::span(b, size)) << fmt::format("a:\n{}b:\n{}", mda, mdb); @@ -68,7 +68,7 @@ TEST(SYCLUtils, ParalelFor3D) { auto setb = [mdb](auto index) { mdb(index[0], index[1], index[2]) = 22; }; q.parallel_for(range, seta).wait(); - dr::__detail::parallel_for(q, range, setb).wait(); + experimental::dr::__detail::parallel_for(q, range, setb).wait(); EXPECT_EQ(rng::span(a, size), rng::span(b, size)) << fmt::format("a:\n{}b:\n{}", mda, mdb); diff --git a/test/distributed-ranges/common/take.cpp b/test/distributed-ranges/common/take.cpp index 114b1f01e36..a4c6ef2b0fc 100644 --- a/test/distributed-ranges/common/take.cpp +++ b/test/distributed-ranges/common/take.cpp @@ -73,38 +73,38 @@ TYPED_TEST(Take, takeOfOneElementHasOneSegmentAndSameRank) { TypeParam dv(10, 77); auto take_view_result = xhp::views::take(dv, 1); - auto take_view_segments = dr::ranges::segments(take_view_result); - auto dv_segments = dr::ranges::segments(dv); + auto take_view_segments = experimental::dr::ranges::segments(take_view_result); + auto dv_segments = experimental::dr::ranges::segments(dv); EXPECT_TRUE(check_segments(take_view_result)); EXPECT_EQ(rng::size(take_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(take_view_segments[0]), - dr::ranges::rank(dv_segments[0])); + EXPECT_EQ(experimental::dr::ranges::rank(take_view_segments[0]), + experimental::dr::ranges::rank(dv_segments[0])); } TYPED_TEST(Take, takeOfFirstSegementHasOneSegmentAndSameRank) { TypeParam dv(10, 77); - const auto first_seg_size = dr::ranges::segments(dv)[0].size(); + const auto first_seg_size = experimental::dr::ranges::segments(dv)[0].size(); auto take_view_result = xhp::views::take(dv, first_seg_size); - auto take_view_segments = dr::ranges::segments(take_view_result); + auto take_view_segments = experimental::dr::ranges::segments(take_view_result); EXPECT_EQ(rng::size(take_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(take_view_segments[0]), - dr::ranges::rank(dr::ranges::segments(dv)[0])); + EXPECT_EQ(experimental::dr::ranges::rank(take_view_segments[0]), + experimental::dr::ranges::rank(experimental::dr::ranges::segments(dv)[0])); } template void takeHasSameSegments(std::size_t dv_size, std::size_t take_size) { TypeParam dv(dv_size, 77); - auto dv_segments = dr::ranges::segments(dv); + auto dv_segments = experimental::dr::ranges::segments(dv); auto take_view_result = xhp::views::take(dv, take_size); - auto take_view_segments = dr::ranges::segments(take_view_result); + auto take_view_segments = experimental::dr::ranges::segments(take_view_result); EXPECT_EQ(rng::size(dv_segments), rng::size(take_view_segments)); for (std::size_t i = 0; i < rng::size(dv_segments); ++i) - EXPECT_EQ(dr::ranges::rank(dv_segments[i]), - dr::ranges::rank(take_view_segments[i])); + EXPECT_EQ(experimental::dr::ranges::rank(dv_segments[i]), + experimental::dr::ranges::rank(take_view_segments[i])); } TYPED_TEST(Take, takeOfAllButOneSizeHasAllSegmentsWithSameRanks) { diff --git a/test/distributed-ranges/common/zip.cpp b/test/distributed-ranges/common/zip.cpp index 8c108c2cc63..d363807da02 100644 --- a/test/distributed-ranges/common/zip.cpp +++ b/test/distributed-ranges/common/zip.cpp @@ -55,7 +55,7 @@ TYPED_TEST(Zip, RangeSegments) { auto local = rng::views::zip(ops.vec); auto dist = test_zip(ops.dist_vec); - auto flat = rng::views::join(dr::ranges::segments(dist)); + auto flat = rng::views::join(experimental::dr::ranges::segments(dist)); EXPECT_TRUE(is_equal(local, flat)); } @@ -66,7 +66,7 @@ TYPED_TEST(Zip, IterSegments) { auto local = rng::views::zip(ops.vec); auto dist = test_zip(ops.dist_vec); - auto flat = rng::views::join(dr::ranges::segments(dist.begin())); + auto flat = rng::views::join(experimental::dr::ranges::segments(dist.begin())); EXPECT_TRUE(is_equal(local, flat)); } #endif @@ -77,7 +77,7 @@ TYPED_TEST(Zip, Drop) { auto local = rng::views::drop(rng::views::zip(ops.vec), 2); auto dist = xhp::views::drop(test_zip(ops.dist_vec), 2); - auto flat = rng::views::join(dr::ranges::segments(dist)); + auto flat = rng::views::join(experimental::dr::ranges::segments(dist)); EXPECT_EQ(local, dist); EXPECT_TRUE(is_equal(local, flat)); } @@ -179,7 +179,7 @@ TYPED_TEST(Zip, IotaStaticAssert) { static_assert(std::forward_iterator); using Dist = decltype(dist); static_assert(rng::forward_range); - static_assert(dr::distributed_range); + static_assert(experimental::dr::distributed_range); } TYPED_TEST(Zip, Iota) { diff --git a/test/distributed-ranges/include/common-tests.hpp b/test/distributed-ranges/include/common-tests.hpp index dda5be91531..4204107fac2 100644 --- a/test/distributed-ranges/include/common-tests.hpp +++ b/test/distributed-ranges/include/common-tests.hpp @@ -6,7 +6,7 @@ constexpr std::size_t EVENLY_DIVIDABLE_SIZE = 2 * 3 * 5 * 7 * 11 * 13; // good up to 16 processes -template +template using LocalVec = std::vector; struct AOS_Struct { @@ -174,7 +174,7 @@ bool contains_empty(auto &&r) { } std::string check_segments_message(auto &&r) { - auto segments = dr::ranges::segments(r); + auto segments = experimental::dr::ranges::segments(r); auto flat = rng::views::join(segments); if (contains_empty(segments) || !is_equal(r, flat)) { return fmt::format("\n" @@ -254,7 +254,7 @@ auto check_binary_check_op(rng::range auto &&a, rng::range auto &&b, } auto check_segments(std::forward_iterator auto di) { - auto segments = dr::ranges::segments(di); + auto segments = experimental::dr::ranges::segments(di); auto flat = rng::join_view(segments); if (contains_empty(segments) || !is_equal(di, flat)) { return testing::AssertionFailure() @@ -295,7 +295,7 @@ concept streamable = requires(std::ostream &os, T value) { { os << value } -> std::convertible_to; }; -namespace dr::mhp { +namespace experimental::dr::mhp { // gtest relies on ADL to find the printer template @@ -325,9 +325,9 @@ bool operator==(const xhp::distributed_vector &dist_vec, return is_equal(local_vec, dist_vec); } -} // namespace dr::mhp +} // namespace experimental::dr::mhp -namespace dr::shp { +namespace experimental::dr::shp { // gtest relies on ADL to find the printer template @@ -357,7 +357,7 @@ bool operator==(const xhp::distributed_vector &dist_vec, return is_equal(dist_vec, local_vec); } -} // namespace dr::shp +} // namespace experimental::dr::shp namespace DR_RANGES_NAMESPACE { diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index b635c99d31a..063515a87ec 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: Intel Corporation # # SPDX-License-Identifier: BSD-3-Clause - include(FetchContent) FetchContent_Declare( @@ -40,6 +39,11 @@ target_compile_definitions(dr_shp INTERFACE USE_MKL _GLIBCXX_USE_TBB_PAR_BACKEND=0) target_link_libraries(dr_shp INTERFACE range-v3 fmt::fmt MKL::MKL_DPCPP) +if (DEFINED ONEDPL_USE_DR) + target_compile_options(dr_shp INTERFACE "-DONEDPL_USE_DISTRIBUTED_RANGES") +endif() + + # For use, see: # https://github.com/illuhad/hipSYCL/blob/develop/doc/using-hipsycl.md#using-the-cmake-integration # example: cmake .. -DhipSYCL_DIR= @@ -50,7 +54,6 @@ if($(HIPSYCL_TARGETS)) target_compile_options(dr_shp INTERFACE --hipsycl-targets='cuda:sm_75') endif() - set(CMAKE_INCLUDE_CURRENT_DIR ON) add_executable( @@ -62,15 +65,11 @@ add_executable( ../common/reduce.cpp ../common/sort.cpp ../common/subrange.cpp ../common/take.cpp ../common/transform.cpp ../common/transform_view.cpp ../common/zip.cpp ../common/zip_local.cpp containers.cpp algorithms.cpp - copy.cpp detail.cpp fill.cpp gemv.cpp transform.cpp) + copy.cpp detail.cpp fill.cpp transform.cpp) add_executable(shp-tests-3 shp-tests.cpp containers-3.cpp copy-3.cpp) -# skeleton for rapid builds of individual tests, feel free to change this -add_executable(shp-quick-test shp-tests.cpp ../common/transform.cpp) -target_compile_definitions(shp-quick-test PRIVATE QUICK_TEST) - -foreach(test-exec IN ITEMS shp-tests shp-tests-3 shp-quick-test) +foreach(test-exec IN ITEMS shp-tests shp-tests-3) target_link_libraries(${test-exec} GTest::gtest_main DR::shp fmt::fmt cxxopts) endforeach() diff --git a/test/distributed-ranges/shp/algorithms.cpp b/test/distributed-ranges/shp/algorithms.cpp index eeea46d5663..3fe5839d6fe 100644 --- a/test/distributed-ranges/shp/algorithms.cpp +++ b/test/distributed-ranges/shp/algorithms.cpp @@ -5,7 +5,7 @@ #include "xhp-tests.hpp" using T = int; -using DV = dr::shp::distributed_vector>; +using DV = experimental::dr::shp::distributed_vector>; using V = std::vector; TEST(ShpTests, InclusiveScan_aligned) { @@ -13,17 +13,17 @@ TEST(ShpTests, InclusiveScan_aligned) { // With execution Policy { - dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> v(n); std::vector lv(n); // Range case, no binary op or init, perfectly aligned for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(dr::shp::par_unseq, v, v); + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v, v); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -33,10 +33,10 @@ TEST(ShpTests, InclusiveScan_aligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), v.begin()); + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), v.begin()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -45,17 +45,17 @@ TEST(ShpTests, InclusiveScan_aligned) { // Without execution policies { - dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> v(n); std::vector lv(n); // Range case, no binary op or init, perfectly aligned for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(v, v); + experimental::dr::shp::inclusive_scan(v, v); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -65,10 +65,10 @@ TEST(ShpTests, InclusiveScan_aligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(v.begin(), v.end(), v.begin()); + experimental::dr::shp::inclusive_scan(v.begin(), v.end(), v.begin()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -81,8 +81,8 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { // With execution policies { - dr::shp::distributed_vector> v(n); - dr::shp::distributed_vector> o( + experimental::dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> o( v.size() * 2); std::vector lv(n); @@ -90,10 +90,10 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(dr::shp::par_unseq, v, o, std::plus<>()); + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v, o, std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -103,11 +103,11 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::multiplies<>(), 12); - dr::shp::inclusive_scan(dr::shp::par_unseq, v, o, std::multiplies<>(), 12); + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v, o, std::multiplies<>(), 12); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -117,10 +117,10 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), o.begin(), + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), o.begin(), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -131,11 +131,11 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::multiplies<>(), 12); - dr::shp::inclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), o.begin(), + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), o.begin(), std::multiplies<>(), 12); for (std::size_t i = 0; i < lv.size(); i++) { @@ -145,8 +145,8 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { // Without execution policies { - dr::shp::distributed_vector> v(n); - dr::shp::distributed_vector> o( + experimental::dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> o( v.size() * 2); std::vector lv(n); @@ -154,10 +154,10 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(v, o, std::plus<>()); + experimental::dr::shp::inclusive_scan(v, o, std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -167,11 +167,11 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::multiplies<>(), 12); - dr::shp::inclusive_scan(v, o, std::multiplies<>(), 12); + experimental::dr::shp::inclusive_scan(v, o, std::multiplies<>(), 12); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -181,10 +181,10 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(v.begin(), v.end(), o.begin(), std::plus<>()); + experimental::dr::shp::inclusive_scan(v.begin(), v.end(), o.begin(), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -194,11 +194,11 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::multiplies<>(), 12); - dr::shp::inclusive_scan(v.begin(), v.end(), o.begin(), std::multiplies<>(), + experimental::dr::shp::inclusive_scan(v.begin(), v.end(), o.begin(), std::multiplies<>(), 12); for (std::size_t i = 0; i < lv.size(); i++) { @@ -212,17 +212,17 @@ TEST(ShpTests, ExclusiveScan_aligned) { // With execution Policy { - dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> v(n); std::vector lv(n); // Range case, no binary op or init, perfectly aligned for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(dr::shp::par_unseq, v, v, int(0)); + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v, v, int(0)); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -232,10 +232,10 @@ TEST(ShpTests, ExclusiveScan_aligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), v.begin(), + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), v.begin(), int(0)); for (std::size_t i = 0; i < lv.size(); i++) { @@ -245,17 +245,17 @@ TEST(ShpTests, ExclusiveScan_aligned) { // Without execution policies { - dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> v(n); std::vector lv(n); // Range case, no binary op or init, perfectly aligned for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(v, v, int(0)); + experimental::dr::shp::exclusive_scan(v, v, int(0)); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -265,10 +265,10 @@ TEST(ShpTests, ExclusiveScan_aligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(v.begin(), v.end(), v.begin(), int(0)); + experimental::dr::shp::exclusive_scan(v.begin(), v.end(), v.begin(), int(0)); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -281,8 +281,8 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { // With execution policies { - dr::shp::distributed_vector> v(n); - dr::shp::distributed_vector> o( + experimental::dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> o( v.size() * 2); std::vector lv(n); @@ -290,10 +290,10 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(dr::shp::par_unseq, v, o, int(0), std::plus<>()); + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v, o, int(0), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -303,11 +303,11 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), 12, std::multiplies<>()); - dr::shp::exclusive_scan(dr::shp::par_unseq, v, o, 12, std::multiplies<>()); + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v, o, 12, std::multiplies<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -317,10 +317,10 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), o.begin(), + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), o.begin(), int(0), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -331,11 +331,11 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(12), std::multiplies<>()); - dr::shp::exclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), o.begin(), + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), o.begin(), int(12), std::multiplies<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -345,8 +345,8 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { // Without execution policies { - dr::shp::distributed_vector> v(n); - dr::shp::distributed_vector> o( + experimental::dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> o( v.size() * 2); std::vector lv(n); @@ -354,10 +354,10 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(12)); - dr::shp::exclusive_scan(v, o, int(12), std::plus<>()); + experimental::dr::shp::exclusive_scan(v, o, int(12), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -367,11 +367,11 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), 12, std::multiplies<>()); - dr::shp::exclusive_scan(v, o, 12, std::multiplies<>()); + experimental::dr::shp::exclusive_scan(v, o, 12, std::multiplies<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -381,10 +381,10 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(v.begin(), v.end(), o.begin(), int(0), + experimental::dr::shp::exclusive_scan(v.begin(), v.end(), o.begin(), int(0), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -395,11 +395,11 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), 12, std::multiplies<>()); - dr::shp::exclusive_scan(v.begin(), v.end(), o.begin(), 12, + experimental::dr::shp::exclusive_scan(v.begin(), v.end(), o.begin(), 12, std::multiplies<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -414,16 +414,16 @@ TEST(ShpTests, Sort) { for (std::size_t n : sizes) { std::vector l_v = generate_random(n, 100); - dr::shp::distributed_vector d_v(n); + experimental::dr::shp::distributed_vector d_v(n); - dr::shp::copy(l_v.begin(), l_v.end(), d_v.begin()); + experimental::dr::shp::copy(l_v.begin(), l_v.end(), d_v.begin()); std::sort(l_v.begin(), l_v.end()); - dr::shp::sort(d_v); + experimental::dr::shp::sort(d_v); std::vector d_v_l(n); - dr::shp::copy(d_v.begin(), d_v.end(), d_v_l.begin()); + experimental::dr::shp::copy(d_v.begin(), d_v.end(), d_v_l.begin()); for (std::size_t i = 0; i < l_v.size(); i++) { EXPECT_EQ(l_v[i], d_v_l[i]); diff --git a/test/distributed-ranges/shp/containers-3.cpp b/test/distributed-ranges/shp/containers-3.cpp index b42d7796e2e..6d09b96ae94 100644 --- a/test/distributed-ranges/shp/containers-3.cpp +++ b/test/distributed-ranges/shp/containers-3.cpp @@ -6,8 +6,8 @@ TYPED_TEST_SUITE(DistributedVectorTest, AllocatorTypes); TYPED_TEST(DistributedVectorTest, tests_from_this_file_run_on_3_devices) { - EXPECT_EQ(dr::shp::nprocs(), 3); - EXPECT_EQ(std::size(dr::shp::devices()), 3); + EXPECT_EQ(experimental::dr::shp::nprocs(), 3); + EXPECT_EQ(std::size(experimental::dr::shp::devices()), 3); } TYPED_TEST(DistributedVectorTest, segments_sizes_in_uneven_distribution) { diff --git a/test/distributed-ranges/shp/containers.cpp b/test/distributed-ranges/shp/containers.cpp index f32a4bc9cf5..96af37aa29f 100644 --- a/test/distributed-ranges/shp/containers.cpp +++ b/test/distributed-ranges/shp/containers.cpp @@ -23,7 +23,7 @@ TYPED_TEST(DistributedVectorTest, std::iota(dv.begin(), dv.end(), 20); auto second = dv.begin() + 2; - EXPECT_EQ(second[0], dr::ranges::segments(second)[0][0]); + EXPECT_EQ(second[0], experimental::dr::ranges::segments(second)[0][0]); } TYPED_TEST(DistributedVectorTest, fill_constructor) { @@ -87,7 +87,7 @@ TYPED_TEST(DistributedVectorTest, Iterator) { TYPED_TEST(DistributedVectorTest, Resize) { std::size_t size = 100; typename TestFixture::DistVec dv(size); - dr::shp::iota(dv.begin(), dv.end(), 20); + experimental::dr::shp::iota(dv.begin(), dv.end(), 20); typename TestFixture::LocalVec v(size); std::iota(v.begin(), v.end(), 20); @@ -107,11 +107,11 @@ TYPED_TEST(DistributedVectorTest, Resize) { template class DeviceVectorTest : public testing::Test { public: - using DeviceVec = dr::shp::device_vector; + using DeviceVec = experimental::dr::shp::device_vector; }; TYPED_TEST_SUITE(DeviceVectorTest, AllocatorTypes); TYPED_TEST(DeviceVectorTest, is_remote_contiguous_range) { - static_assert(dr::remote_contiguous_range); + static_assert(experimental::dr::remote_contiguous_range); } diff --git a/test/distributed-ranges/shp/containers.hpp b/test/distributed-ranges/shp/containers.hpp index e14c42b52dc..24d2c46be37 100644 --- a/test/distributed-ranges/shp/containers.hpp +++ b/test/distributed-ranges/shp/containers.hpp @@ -7,6 +7,6 @@ template class DistributedVectorTest : public testing::Test { public: using DistVec = - dr::shp::distributed_vector; + experimental::dr::shp::distributed_vector; using LocalVec = std::vector; }; diff --git a/test/distributed-ranges/shp/copy-3.cpp b/test/distributed-ranges/shp/copy-3.cpp index 877e843871a..b330f8cfd3a 100644 --- a/test/distributed-ranges/shp/copy-3.cpp +++ b/test/distributed-ranges/shp/copy-3.cpp @@ -6,8 +6,8 @@ TYPED_TEST_SUITE(CopyTest, AllocatorTypes); TYPED_TEST(CopyTest, tests_from_this_file_run_on_3_devices) { - EXPECT_EQ(dr::shp::nprocs(), 3); - EXPECT_EQ(rng::size(dr::shp::devices()), 3); + EXPECT_EQ(experimental::dr::shp::nprocs(), 3); + EXPECT_EQ(rng::size(experimental::dr::shp::devices()), 3); } TYPED_TEST(CopyTest, dist2local_wholesegment) { @@ -16,7 +16,7 @@ TYPED_TEST(CopyTest, dist2local_wholesegment) { 7, 8, 9, 10, 11, 12}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0}; - auto ret_it = dr::shp::copy(rng::begin(dist_vec) + 4, + auto ret_it = experimental::dr::shp::copy(rng::begin(dist_vec) + 4, rng::begin(dist_vec) + 8, rng::begin(local_vec)); EXPECT_TRUE(equal(local_vec, typename TestFixture::LocalVec{5, 6, 7, 8})); EXPECT_EQ(ret_it, rng::end(local_vec)); @@ -27,7 +27,7 @@ TYPED_TEST(CopyTest, local2dist_wholesegment) { const typename TestFixture::LocalVec local_vec = {50, 60, 70, 80}; typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - auto ret_it = dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + auto ret_it = experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec) + 4); EXPECT_TRUE(equal(dist_vec, typename TestFixture::LocalVec{ 1, 2, 3, 4, 50, 60, 70, 80, 9, 10, 11, 12})); diff --git a/test/distributed-ranges/shp/copy.cpp b/test/distributed-ranges/shp/copy.cpp index 47744e41d95..6076a4e03cb 100644 --- a/test/distributed-ranges/shp/copy.cpp +++ b/test/distributed-ranges/shp/copy.cpp @@ -8,7 +8,7 @@ TYPED_TEST_SUITE(CopyTest, AllocatorTypes); TYPED_TEST(CopyTest, dist2local_async) { const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0}; - dr::shp::copy_async(rng::begin(dist_vec), rng::end(dist_vec), + experimental::dr::shp::copy_async(rng::begin(dist_vec), rng::end(dist_vec), rng::begin(local_vec)) .wait(); EXPECT_TRUE(equal(local_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5})); @@ -17,7 +17,7 @@ TYPED_TEST(CopyTest, dist2local_async) { TYPED_TEST(CopyTest, local2dist_async) { const typename TestFixture::LocalVec local_vec = {1, 2, 3, 4, 5}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0}; - dr::shp::copy_async(rng::begin(local_vec), rng::end(local_vec), + experimental::dr::shp::copy_async(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec)) .wait(); EXPECT_TRUE(equal(dist_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5})); @@ -26,7 +26,7 @@ TYPED_TEST(CopyTest, local2dist_async) { TYPED_TEST(CopyTest, dist2local_sync) { const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 9}; - auto ret_it = dr::shp::copy(rng::begin(dist_vec), rng::end(dist_vec), + auto ret_it = experimental::dr::shp::copy(rng::begin(dist_vec), rng::end(dist_vec), rng::begin(local_vec)); EXPECT_TRUE( equal(local_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 9})); @@ -36,7 +36,7 @@ TYPED_TEST(CopyTest, dist2local_sync) { TYPED_TEST(CopyTest, local2dist_sync) { const typename TestFixture::LocalVec local_vec = {1, 2, 3, 4, 5}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 9}; - auto ret_it = dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + auto ret_it = experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec)); EXPECT_TRUE( equal(dist_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 9})); @@ -46,7 +46,7 @@ TYPED_TEST(CopyTest, local2dist_sync) { TYPED_TEST(CopyTest, dist2local_range_sync) { const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 9}; - auto ret_it = dr::shp::copy(dist_vec, rng::begin(local_vec)); + auto ret_it = experimental::dr::shp::copy(dist_vec, rng::begin(local_vec)); EXPECT_TRUE( equal(local_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 9})); EXPECT_EQ(*ret_it, 9); @@ -55,7 +55,7 @@ TYPED_TEST(CopyTest, dist2local_range_sync) { TYPED_TEST(CopyTest, local2dist_range_sync) { const typename TestFixture::LocalVec local_vec = {1, 2, 3, 4, 5}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 9}; - auto ret_it = dr::shp::copy(local_vec, rng::begin(dist_vec)); + auto ret_it = experimental::dr::shp::copy(local_vec, rng::begin(dist_vec)); EXPECT_TRUE( equal(dist_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 9})); EXPECT_EQ(*ret_it, 9); @@ -65,10 +65,10 @@ TYPED_TEST(CopyTest, dist2local_async_can_interleave) { const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 0, 0, 0}; auto event_1 = - dr::shp::copy_async(rng::begin(dist_vec) + 0, rng::begin(dist_vec) + 4, + experimental::dr::shp::copy_async(rng::begin(dist_vec) + 0, rng::begin(dist_vec) + 4, rng::begin(local_vec) + 0); auto event_2 = - dr::shp::copy_async(rng::begin(dist_vec) + 1, rng::begin(dist_vec) + 5, + experimental::dr::shp::copy_async(rng::begin(dist_vec) + 1, rng::begin(dist_vec) + 5, rng::begin(local_vec) + 4); event_1.wait(); event_2.wait(); @@ -80,9 +80,9 @@ TYPED_TEST(CopyTest, local2dist_async_can_interleave) { const typename TestFixture::LocalVec local_vec_1 = {1, 2, 3}; const typename TestFixture::LocalVec local_vec_2 = {4, 5}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0}; - auto event_1 = dr::shp::copy_async( + auto event_1 = experimental::dr::shp::copy_async( rng::begin(local_vec_1), rng::end(local_vec_1), rng::begin(dist_vec)); - auto event_2 = dr::shp::copy_async( + auto event_2 = experimental::dr::shp::copy_async( rng::begin(local_vec_2), rng::end(local_vec_2), rng::begin(dist_vec) + 3); event_1.wait(); event_2.wait(); @@ -94,7 +94,7 @@ TYPED_TEST(CopyTest, dist2local_sliced_bothSides) { 6, 7, 8, 9, 10}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(dist_vec) + 1, rng::end(dist_vec) - 1, + experimental::dr::shp::copy(rng::begin(dist_vec) + 1, rng::end(dist_vec) - 1, rng::begin(local_vec)); EXPECT_TRUE(equal( local_vec, typename TestFixture::LocalVec{2, 3, 4, 5, 6, 7, 8, 9, 0, 0})); @@ -105,7 +105,7 @@ TYPED_TEST(CopyTest, dist2local_sliced_left) { 6, 7, 8, 9, 10}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(dist_vec) + 1, rng::end(dist_vec), + experimental::dr::shp::copy(rng::begin(dist_vec) + 1, rng::end(dist_vec), rng::begin(local_vec)); EXPECT_TRUE(equal(local_vec, typename TestFixture::LocalVec{2, 3, 4, 5, 6, 7, 8, 9, 10, 0})); @@ -116,7 +116,7 @@ TYPED_TEST(CopyTest, dist2local_sliced_right) { 6, 7, 8, 9, 10}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(dist_vec), rng::end(dist_vec) - 1, + experimental::dr::shp::copy(rng::begin(dist_vec), rng::end(dist_vec) - 1, rng::begin(local_vec)); EXPECT_TRUE(equal( local_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 6, 7, 8, 9, 0})); @@ -126,7 +126,7 @@ TYPED_TEST(CopyTest, local2dist_sliced_bothSides) { const typename TestFixture::LocalVec local_vec = {2, 3, 4, 5, 6, 7, 8, 9}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec) + 1); EXPECT_TRUE(equal( dist_vec, typename TestFixture::LocalVec{0, 2, 3, 4, 5, 6, 7, 8, 9, 0})); @@ -136,7 +136,7 @@ TYPED_TEST(CopyTest, local2dist_sliced_left) { const typename TestFixture::LocalVec local_vec = {2, 3, 4, 5, 6, 7, 8, 9}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec) + 2); EXPECT_TRUE(equal( dist_vec, typename TestFixture::LocalVec{0, 0, 2, 3, 4, 5, 6, 7, 8, 9})); @@ -146,7 +146,7 @@ TYPED_TEST(CopyTest, local2dist_sliced_right) { const typename TestFixture::LocalVec local_vec = {2, 3, 4, 5, 6, 7, 8, 9}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec)); EXPECT_TRUE(equal( dist_vec, typename TestFixture::LocalVec{2, 3, 4, 5, 6, 7, 8, 9, 0, 0})); diff --git a/test/distributed-ranges/shp/copy.hpp b/test/distributed-ranges/shp/copy.hpp index babf02f9958..6d072a6a271 100644 --- a/test/distributed-ranges/shp/copy.hpp +++ b/test/distributed-ranges/shp/copy.hpp @@ -7,6 +7,6 @@ template class CopyTest : public testing::Test { public: using DistVec = - dr::shp::distributed_vector; + experimental::dr::shp::distributed_vector; using LocalVec = std::vector; }; diff --git a/test/distributed-ranges/shp/detail.cpp b/test/distributed-ranges/shp/detail.cpp index 8ff95b4465a..b02240212c0 100644 --- a/test/distributed-ranges/shp/detail.cpp +++ b/test/distributed-ranges/shp/detail.cpp @@ -3,9 +3,9 @@ // SPDX-License-Identifier: BSD-3-Clause #include "xhp-tests.hpp" -#include +#include -namespace shp = dr::shp; +namespace shp = experimental::dr::shp; TEST(DetailTest, parallel_for) { std::size_t size = 2 * 1024 * 1024; @@ -26,7 +26,7 @@ TEST(DetailTest, parallel_for) { auto dv = dvec.data(); - dr::__detail::parallel_for(q, n, [=](auto i) { + experimental::dr::__detail::parallel_for(q, n, [=](auto i) { sycl::atomic_ref v(dv[i % size]); diff --git a/test/distributed-ranges/shp/fill.cpp b/test/distributed-ranges/shp/fill.cpp index b21b4390d55..d08b2c8c777 100644 --- a/test/distributed-ranges/shp/fill.cpp +++ b/test/distributed-ranges/shp/fill.cpp @@ -6,7 +6,7 @@ template class FillTest : public testing::Test { public: using DistVec = - dr::shp::distributed_vector; + experimental::dr::shp::distributed_vector; using LocalVec = std::vector; }; @@ -18,7 +18,7 @@ TYPED_TEST(FillTest, fill_all) { auto segments = dist_vec.segments(); int value = 1; for (auto &&segment : segments) { - dr::shp::fill(segment.begin(), segment.end(), value); + experimental::dr::shp::fill(segment.begin(), segment.end(), value); } EXPECT_TRUE(equal( dist_vec, typename TestFixture::DistVec{1, 1, 1, 1, 1, 1, 1, 1, 1, 1})); diff --git a/test/distributed-ranges/shp/gemv.cpp b/test/distributed-ranges/shp/gemv.cpp deleted file mode 100644 index 11cc779ff89..00000000000 --- a/test/distributed-ranges/shp/gemv.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#include "xhp-tests.hpp" - -TEST(SparseMatrix, Gemv) { - std::size_t m = 100; - std::size_t k = 100; - - dr::shp::sparse_matrix a( - {m, k}, 0.1f, - dr::shp::block_cyclic({dr::shp::tile::div, dr::shp::tile::div}, - {dr::shp::nprocs(), 1})); - - dr::shp::distributed_vector b(k, 1.f); - dr::shp::distributed_vector c(m, 0.f); - - dr::shp::gemv(c, a, b); - - std::vector c_local(m); - - dr::shp::copy(c.begin(), c.end(), c_local.begin()); - - std::vector c_ref(m, 0.f); - - for (auto &&[index, v] : a) { - auto &&[i, k] = index; - - c_ref[i] += v; - } - - EXPECT_TRUE(fp_equal(c_ref, c_local)) - << fmt::format("Reference:\n {}\nActual:\n {}\n", c_ref, c_local); -} diff --git a/test/distributed-ranges/shp/shp-tests.cpp b/test/distributed-ranges/shp/shp-tests.cpp index d8856337ef5..894b9c0f37e 100644 --- a/test/distributed-ranges/shp/shp-tests.cpp +++ b/test/distributed-ranges/shp/shp-tests.cpp @@ -29,7 +29,7 @@ int main(int argc, char *argv[]) { } const unsigned int dev_num = options["devicesCount"].as(); - auto devices = dr::shp::get_numa_devices(sycl::default_selector_v); + auto devices = experimental::dr::shp::get_numa_devices(sycl::default_selector_v); if (dev_num > 0) { unsigned int i = 0; @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) { devices.resize(dev_num); // if too many devices } - dr::shp::init(devices); + experimental::dr::shp::init(devices); for (auto &device : devices) { std::cout << " Device: " << device.get_info() diff --git a/test/distributed-ranges/shp/transform.cpp b/test/distributed-ranges/shp/transform.cpp index 43b80ae3cd7..ea919562419 100644 --- a/test/distributed-ranges/shp/transform.cpp +++ b/test/distributed-ranges/shp/transform.cpp @@ -6,7 +6,7 @@ template class TransformTest : public testing::Test { public: using DistVec = - dr::shp::distributed_vector; + experimental::dr::shp::distributed_vector; using LocalVec = std::vector; constexpr static const auto add_10_func = [](auto x) { return x + 10; }; }; @@ -16,7 +16,7 @@ TYPED_TEST_SUITE(TransformTest, AllocatorTypes); TYPED_TEST(TransformTest, whole_aligned) { const typename TestFixture::DistVec a = {0, 1, 2, 3, 4}; typename TestFixture::DistVec b = {9, 9, 9, 9, 9}; - auto r = dr::shp::transform(dr::shp::par_unseq, a, rng::begin(b), + auto r = experimental::dr::shp::transform(experimental::dr::shp::par_unseq, a, rng::begin(b), TestFixture::add_10_func); EXPECT_EQ(r.in, a.end()); EXPECT_EQ(r.out, b.end()); @@ -29,7 +29,7 @@ TYPED_TEST(TransformTest, whole_non_aligned) { typename TestFixture::DistVec b = {50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60}; - auto r = dr::shp::transform(dr::shp::par_unseq, a, rng::begin(b), + auto r = experimental::dr::shp::transform(experimental::dr::shp::par_unseq, a, rng::begin(b), TestFixture::add_10_func); EXPECT_EQ(r.in, a.end()); EXPECT_EQ(*r.out, 55); @@ -42,8 +42,8 @@ TYPED_TEST(TransformTest, part_aligned) { const typename TestFixture::DistVec a = {0, 1, 2, 3, 4}; typename TestFixture::DistVec b = {9, 9, 9, 9, 9}; - auto [r_in, r_out] = dr::shp::transform( - dr::shp::par_unseq, rng::subrange(++rng::begin(a), --rng::end(a)), + auto [r_in, r_out] = experimental::dr::shp::transform( + experimental::dr::shp::par_unseq, rng::subrange(++rng::begin(a), --rng::end(a)), ++rng::begin(b), TestFixture::add_10_func); EXPECT_EQ(*r_in, 4); EXPECT_EQ(*r_out, 9); @@ -55,8 +55,8 @@ TYPED_TEST(TransformTest, part_not_aligned) { const typename TestFixture::DistVec a = {0, 1, 2, 3}; typename TestFixture::DistVec b = {9, 9, 9, 9, 9, 9, 9, 9, 9}; - auto [r_in, r_out] = dr::shp::transform( - dr::shp::par_unseq, rng::subrange(++rng::begin(a), rng::end(a)), + auto [r_in, r_out] = experimental::dr::shp::transform( + experimental::dr::shp::par_unseq, rng::subrange(++rng::begin(a), rng::end(a)), rng::begin(b) + 5, TestFixture::add_10_func); EXPECT_EQ(r_in, a.end()); EXPECT_EQ(r_out, rng::begin(b) + 8); // initial shift in b + subrange size @@ -67,7 +67,7 @@ TYPED_TEST(TransformTest, part_not_aligned) { TYPED_TEST(TransformTest, inplace_whole) { typename TestFixture::DistVec a = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - auto [r_in, r_out] = dr::shp::transform(dr::shp::par_unseq, a, rng::begin(a), + auto [r_in, r_out] = experimental::dr::shp::transform(experimental::dr::shp::par_unseq, a, rng::begin(a), TestFixture::add_10_func); EXPECT_EQ(r_in, rng::end(a)); EXPECT_EQ(r_out, rng::end(a)); @@ -77,8 +77,8 @@ TYPED_TEST(TransformTest, inplace_whole) { TYPED_TEST(TransformTest, inplace_part) { typename TestFixture::DistVec a = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - auto [r_in, r_out] = dr::shp::transform( - dr::shp::par_unseq, rng::subrange(++rng::begin(a), --rng::end(a)), + auto [r_in, r_out] = experimental::dr::shp::transform( + experimental::dr::shp::par_unseq, rng::subrange(++rng::begin(a), --rng::end(a)), ++rng::begin(a), TestFixture::add_10_func); EXPECT_EQ(*r_in, 8); EXPECT_EQ(r_out, --rng::end(a)); @@ -89,7 +89,7 @@ TYPED_TEST(TransformTest, inplace_part) { TYPED_TEST(TransformTest, large_aligned_whole) { const typename TestFixture::DistVec a(12345, 7); typename TestFixture::DistVec b(12345, 3); - dr::shp::transform(dr::shp::par_unseq, a, rng::begin(b), + experimental::dr::shp::transform(experimental::dr::shp::par_unseq, a, rng::begin(b), TestFixture::add_10_func); EXPECT_EQ(b[0], 17); @@ -110,7 +110,7 @@ TYPED_TEST(TransformTest, large_aligned_whole) { TYPED_TEST(TransformTest, large_aligned_part) { const typename TestFixture::DistVec a(12345, 7); typename TestFixture::DistVec b(12345, 3); - dr::shp::transform(dr::shp::par_unseq, + experimental::dr::shp::transform(experimental::dr::shp::par_unseq, rng::subrange(rng::begin(a) + 1000, rng::begin(a) + 1005), rng::begin(b) + 1000, TestFixture::add_10_func); @@ -127,7 +127,7 @@ TYPED_TEST(TransformTest, large_aligned_part) { TYPED_TEST(TransformTest, large_aligned_part_shifted) { const typename TestFixture::DistVec a(12345, 7); typename TestFixture::DistVec b(12345, 3); - dr::shp::transform(dr::shp::par_unseq, + experimental::dr::shp::transform(experimental::dr::shp::par_unseq, rng::subrange(rng::begin(a) + 1000, rng::begin(a) + 1005), rng::begin(b) + 999, TestFixture::add_10_func); @@ -144,7 +144,7 @@ TYPED_TEST(TransformTest, large_aligned_part_shifted) { TYPED_TEST(TransformTest, large_not_aligned) { const typename TestFixture::DistVec a(10000, 7); typename TestFixture::DistVec b(17000, 3); - dr::shp::transform(dr::shp::par_unseq, + experimental::dr::shp::transform(experimental::dr::shp::par_unseq, rng::subrange(rng::begin(a) + 2000, rng::begin(a) + 9000), rng::begin(b) + 9000, TestFixture::add_10_func); @@ -164,8 +164,8 @@ TYPED_TEST(TransformTest, large_not_aligned) { TYPED_TEST(TransformTest, large_inplace) { typename TestFixture::DistVec a(77000, 7); - auto r = dr::shp::transform( - dr::shp::par_unseq, + auto r = experimental::dr::shp::transform( + experimental::dr::shp::par_unseq, rng::subrange(rng::begin(a) + 22222, rng::begin(a) + 55555), rng::begin(a) + 22222, TestFixture::add_10_func); diff --git a/test/distributed-ranges/shp/xhp-tests.hpp b/test/distributed-ranges/shp/xhp-tests.hpp index 2ab612d9635..5b02a6b2de6 100644 --- a/test/distributed-ranges/shp/xhp-tests.hpp +++ b/test/distributed-ranges/shp/xhp-tests.hpp @@ -4,11 +4,11 @@ #pragma once #include "cxxopts.hpp" -#include -#include #include #include #include +#include +// #include #define TEST_SHP @@ -17,20 +17,21 @@ const std::size_t comm_rank = 0; const std::size_t comm_size = 1; // Namespace aliases and wrapper functions to make the tests uniform -namespace xhp = dr::shp; +namespace xhp = experimental::dr::shp; inline void barrier() {} inline void fence() {} inline void fence_on(auto &&) {} -using AllocatorTypes = ::testing::Types>; +using AllocatorTypes = + ::testing::Types>; template concept compliant_view = rng::forward_range && requires(V &v) { - dr::ranges::segments(v); - dr::ranges::rank(dr::ranges::segments(v)[0]); + experimental::dr::ranges::segments(v); + experimental::dr::ranges::rank(experimental::dr::ranges::segments(v)[0]); }; -#include "common-tests.hpp" +#include "../include/common-tests.hpp" -using AllTypes = ::testing::Types>; +using AllTypes = ::testing::Types>; \ No newline at end of file diff --git a/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp b/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp index f5294f2e382..e24c4c00522 100644 --- a/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp +++ b/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp @@ -492,6 +492,7 @@ main() bool bProcessed = false; #if TEST_DYNAMIC_SELECTION_AVAILABLE +#if !ONEDPL_FPGA_DEVICE || !ONEDPL_FPGA_EMULATOR using policy_t = oneapi::dpl::experimental::auto_tune_policy; std::vector u; build_auto_tune_universe(u); @@ -542,6 +543,7 @@ main() bProcessed = true; } +#endif // Devices available are CPU and GPU #endif // TEST_DYNAMIC_SELECTION_AVAILABLE return TestUtils::done(bProcessed); diff --git a/test/parallel_api/dynamic_selection/sycl/test_dynamic_load_policy_sycl.pass.cpp b/test/parallel_api/dynamic_selection/sycl/test_dynamic_load_policy_sycl.pass.cpp index 17d6de6aa4f..b473892af19 100644 --- a/test/parallel_api/dynamic_selection/sycl/test_dynamic_load_policy_sycl.pass.cpp +++ b/test/parallel_api/dynamic_selection/sycl/test_dynamic_load_policy_sycl.pass.cpp @@ -47,6 +47,7 @@ main() bool bProcessed = false; #if TEST_DYNAMIC_SELECTION_AVAILABLE +#if !ONEDPL_FPGA_DEVICE || !ONEDPL_FPGA_EMULATOR using policy_t = oneapi::dpl::experimental::dynamic_load_policy; std::vector u; build_dl_universe(u); @@ -76,6 +77,7 @@ main() bProcessed = true; } +#endif // Devices available are CPU and GPU #endif // TEST_DYNAMIC_SELECTION_AVAILABLE return TestUtils::done(bProcessed); diff --git a/test/parallel_api/experimental/for_loop.pass.cpp b/test/parallel_api/experimental/for_loop.pass.cpp index 8c59ffe1afd..725d2bc9747 100644 --- a/test/parallel_api/experimental/for_loop.pass.cpp +++ b/test/parallel_api/experimental/for_loop.pass.cpp @@ -296,8 +296,9 @@ test_for_loop() Sequence in_out(n, Gen()); Sequence expected = in_out; - invoke_on_all_policies<>()(test_for_loop_impl(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), - in_out.size()); + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_for_loop_impl(), in_out.begin(), in_out.end(), expected.begin(), + expected.end(), in_out.size()); } } @@ -313,8 +314,9 @@ test_for_loop_strided() ::std::vector strides = {1, 2, 10, n > 1 ? n - 1 : 1, n > 0 ? n : 1, n + 1}; for (size_t stride : strides) { - invoke_on_all_policies<>()(test_for_loop_strided_impl(), in_out.begin(), in_out.end(), expected.begin(), - expected.end(), in_out.size(), stride); + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_for_loop_strided_impl(), in_out.begin(), in_out.end(), expected.begin(), + expected.end(), in_out.size(), stride); } } } diff --git a/test/parallel_api/experimental/for_loop_induction.pass.cpp b/test/parallel_api/experimental/for_loop_induction.pass.cpp index 505ceb54e1e..e01569c58a1 100644 --- a/test/parallel_api/experimental/for_loop_induction.pass.cpp +++ b/test/parallel_api/experimental/for_loop_induction.pass.cpp @@ -153,8 +153,10 @@ test() { Sequence in_out(n, [](long int k) { return T(k % 5 != 1 ? 3 * k - 7 : 0); }); Sequence expected = in_out; - invoke_on_all_policies<>()(test_body(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), - in_out.size()); + + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_body(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), + in_out.size()); } } diff --git a/test/parallel_api/experimental/for_loop_reduction.pass.cpp b/test/parallel_api/experimental/for_loop_reduction.pass.cpp index cfd918ff2bd..7f6b2b9e7f5 100644 --- a/test/parallel_api/experimental/for_loop_reduction.pass.cpp +++ b/test/parallel_api/experimental/for_loop_reduction.pass.cpp @@ -77,8 +77,10 @@ test() { Sequence in_out(n, [](long int k) { return T(k % 5 != 1 ? 3 * k - 7 : 0); }); Sequence expected = in_out; - invoke_on_all_policies<>()(test_body(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), - in_out.size()); + + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_body(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), + in_out.size()); } } @@ -176,10 +178,12 @@ test_predefined(::std::initializer_list init_list) // Just arbitrary numbers Sequence in_out = init_list; Sequence expected = in_out; - invoke_on_all_policies<>()(test_body_predefined(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), - in_out.size()); - invoke_on_all_policies<>()(test_body_predefined_bits(), in_out.begin(), in_out.end(), expected.begin(), - expected.end(), in_out.size()); + + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_body_predefined(), in_out.begin(), in_out.end(), expected.begin(), + expected.end(), in_out.size()); + invoke_on_all_host_policies()(test_body_predefined_bits(), in_out.begin(), in_out.end(), expected.begin(), + expected.end(), in_out.size()); } void diff --git a/test/support/utils_sycl.h b/test/support/utils_sycl.h index d88eee142fa..b2f351d716b 100644 --- a/test/support/utils_sycl.h +++ b/test/support/utils_sycl.h @@ -106,7 +106,7 @@ make_new_policy(_Policy&& __policy) #if ONEDPL_FPGA_DEVICE inline auto default_selector = # if ONEDPL_FPGA_EMULATOR - sycl::ext::intel::fpga_emulator_selector{}; + sycl::ext::intel::fpga_emulator_selector_v; # else sycl::ext::intel::fpga_selector{}; # endif // ONEDPL_FPGA_EMULATOR