From 9d1cdd179ed27d49a9920eed403cde71a9c57f4e Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 13 Mar 2024 12:29:54 +0100 Subject: [PATCH 01/29] [oneDPL] Avoid `__brick_reverse` and `__brick_reverse_copy` calls for empty source data (#1442) --- include/oneapi/dpl/pstl/algorithm_impl.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 628e7ad09b8..7d3e07c4ec0 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1663,10 +1663,10 @@ void __brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, _BidirectionalIterator __d_last, /*is_vector=*/::std::false_type) noexcept { - for (--__d_last; __first != __last; ++__first, --__d_last) + for (; __first != __last; ++__first) { using ::std::iter_swap; - iter_swap(__first, __d_last); + iter_swap(__first, --__d_last); } } @@ -1699,6 +1699,9 @@ oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> __pattern_reverse(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { + if (__first == __last) + return; + __par_backend::__parallel_for( ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, [__is_vector, __first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { @@ -1744,6 +1747,10 @@ __pattern_reverse_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first _RandomAccessIterator2 __d_first, _IsVector __is_vector, /*is_parallel=*/::std::true_type) { auto __len = __last - __first; + + if (__len == 0) + return __d_first; + __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__is_vector, __first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last) { From 440e6738f12c17e75bc2399c76781f592b6bce44 Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" Date: Thu, 14 Mar 2024 15:00:21 +0000 Subject: [PATCH 02/29] cleanups --- CMakeLists.txt | 8 + include/oneapi/dpl/distributed-ranges | 5 + .../concepts/concepts.hpp | 2 +- .../detail/communicator.hpp | 300 ---------------- .../detail/enumerate.hpp | 2 +- .../detail/iterator_adaptor.hpp | 2 +- .../detail/mdarray_shim.hpp | 7 - .../detail/mdspan_shim.hpp | 9 - .../detail/mdspan_utils.hpp | 335 ------------------ .../distributed_ranges_impl/detail/memory.hpp | 104 ------ .../detail/normal_distributed_iterator.hpp | 123 ------- .../detail/onedpl_direct_iterator.hpp | 2 +- .../detail/owning_view.hpp | 2 +- .../distributed_ranges_impl/detail/ranges.hpp | 2 +- .../detail/ranges_utils.hpp | 21 -- .../detail/remote_subrange.hpp | 4 +- .../detail/segments_tools.hpp | 10 +- .../detail/sycl_utils.hpp | 2 +- .../detail/tuple_utils.hpp | 28 -- .../shp/algorithms/copy.hpp | 10 +- .../shp/algorithms/exclusive_scan.hpp | 16 +- .../shp/algorithms/fill.hpp | 10 +- .../shp/algorithms/for_each.hpp | 12 +- .../shp/algorithms/inclusive_scan.hpp | 16 +- .../shp/algorithms/iota.hpp | 8 +- .../shp/algorithms/matrix/gemm.hpp | 4 +- .../shp/algorithms/matrix/gemv.hpp | 18 +- .../shp/algorithms/matrix/local_gemm.hpp | 2 +- .../shp/algorithms/matrix/local_gemv.hpp | 6 +- .../algorithms/matrix/matrix_algorithms.hpp | 4 +- .../shp/algorithms/reduce.hpp | 8 +- .../shp/algorithms/sort.hpp | 6 +- .../shp/algorithms/transform.hpp | 6 +- .../shp/allocators.hpp | 2 +- .../containers/distributed_dense_matrix.hpp | 16 +- .../shp/containers/duplicated_vector.hpp | 4 +- .../shp/containers/matrix_entry.hpp | 2 +- .../shp/containers/matrix_partition.hpp | 6 +- .../containers/sequential/dense_matrix.hpp | 12 +- .../shp/containers/sparse_matrix.hpp | 18 +- .../distributed_ranges_impl/shp/detail.hpp | 8 +- .../shp/device_ptr.hpp | 2 +- .../shp/device_ref.hpp | 2 +- .../shp/device_span.hpp | 4 +- .../shp/device_vector.hpp | 4 +- .../shp/distributed_span.hpp | 10 +- .../shp/distributed_vector.hpp | 10 +- .../distributed_ranges_impl/shp/future.hpp | 2 +- .../distributed_ranges_impl/shp/init.hpp | 4 +- .../distributed_ranges_impl/shp/range.hpp | 4 +- .../shp/range_adaptors.hpp | 4 +- .../distributed_ranges_impl/shp/span.hpp | 2 +- .../shp/util/coo_matrix.hpp | 2 +- .../shp/util/generate_random.hpp | 2 +- .../shp/util/matrix_io.hpp | 4 +- .../shp/views/csr_matrix_view.hpp | 4 +- .../shp/views/dense_column_view.hpp | 4 +- .../shp/views/dense_matrix_iterator.hpp | 10 +- .../shp/views/dense_matrix_view.hpp | 14 +- .../shp/views/dense_row_view.hpp | 6 +- .../shp/views/enumerate.hpp | 2 +- .../shp/views/standard_views.hpp | 12 +- .../shp/views/views.hpp | 8 +- .../distributed_ranges_impl/shp/zip_view.hpp | 10 +- .../views/transform.hpp | 4 +- .../distributed_ranges_impl/views/views.hpp | 4 +- test/distributed-ranges/shp/CMakeLists.txt | 11 +- test/distributed-ranges/shp/detail.cpp | 2 +- test/distributed-ranges/shp/xhp-tests.hpp | 8 +- 69 files changed, 205 insertions(+), 1112 deletions(-) delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/communicator.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdarray_shim.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_shim.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_utils.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/memory.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/normal_distributed_iterator.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges_utils.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/tuple_utils.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 61927a3aca9..0ed8edfaf65 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -311,6 +311,14 @@ if (ONEDPL_BACKEND MATCHES "^(tbb|dpcpp|dpcpp_only)$") $<$,$>:-fsycl-link> ${ONEDPL_AOT_OPTIONS} ) + + # if C++20 or newer, include Distributed Ranges (experimental) + if (CMAKE_CXX_STANDARD GREATER_EQUAL 20) + set(ONEDPL_USE_DR TRUE) + message(STATUS "Adding Distributed Ranges to the project") + else() + message(STATUS "Distributed Ranges not supported by the compiler") + endif() endif() elseif(ONEDPL_BACKEND MATCHES "^(serial)$") diff --git a/include/oneapi/dpl/distributed-ranges b/include/oneapi/dpl/distributed-ranges index dadf6aa73a3..f466b407737 100644 --- a/include/oneapi/dpl/distributed-ranges +++ b/include/oneapi/dpl/distributed-ranges @@ -13,6 +13,11 @@ #include "oneapi/dpl/internal/common_config.h" #include "oneapi/dpl/pstl/onedpl_config.h" +// #if _ONEDPL_BACKEND_SYCL != 0 && __INTEL_LLVM_COMPILER >= 20230000 +#if defined(ONEDPL_USE_DISTRIBUTED_RANGES) #include "oneapi/dpl/internal/distributed_ranges_impl/shp.hpp" +#else +#error "Compiler does not support Distributed Ranges" +#endif #endif /* _ONEDPL_DISTRIBUTED_RANGES */ diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp index 074f7f35501..c0b54ead437 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/communicator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/communicator.hpp deleted file mode 100644 index 596a298bae8..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/communicator.hpp +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -namespace dr { - -class communicator { -public: - communicator(MPI_Comm comm = MPI_COMM_WORLD) : mpi_comm_(comm) { - int rank, size; - MPI_Comm_rank(comm, &rank); - MPI_Comm_size(comm, &size); - rank_ = rank; - size_ = size; - } - - auto size() const { return size_; } - auto rank() const { return rank_; } - auto prev() const { return (rank() + size() - 1) % size(); } - auto next() const { return (rank() + 1) % size(); } - auto first() const { return rank() == 0; } - auto last() const { return rank() == size() - 1; } - - MPI_Comm mpi_comm() const { return mpi_comm_; } - - void barrier() const { -#ifdef DRISHMEM - DRLOG("calling COMM barrier (by calling fence) in ISHMEM"); - ishmem_fence(); -#endif - DRLOG("calling COMM barrier in MPI"); - MPI_Barrier(mpi_comm_); - DRLOG("COMM barrier finished"); - } - - void bcast(void *src, std::size_t count, std::size_t root) const { - MPI_Bcast(src, count, MPI_BYTE, root, mpi_comm_); - } - - void scatter(const void *src, void *dst, std::size_t count, - std::size_t root) const { - MPI_Scatter(src, count, MPI_BYTE, dst, count, MPI_BYTE, root, mpi_comm_); - } - - template - void scatter(const std::span src, T &dst, std::size_t root) const { - assert(rng::size(src) >= size_); - scatter(rng::data(src), &dst, sizeof(T), root); - } - - void scatterv(const void *src, int *counts, int *offsets, void *dst, - int dst_count, std::size_t root) const { - assert(counts == nullptr || counts[rank()] == dst_count); - MPI_Scatterv(src, counts, offsets, MPI_BYTE, dst, dst_count, MPI_BYTE, root, - mpi_comm_); - } - - void gather(const void *src, void *dst, std::size_t count, - std::size_t root) const { - MPI_Gather_c(src, count, MPI_BYTE, dst, count, MPI_BYTE, root, mpi_comm_); - } - - template - void gather(const T &src, std::span dst, std::size_t root) const { - assert(rng::size(dst) >= size_); - gather(&src, rng::data(dst), sizeof(T), root); - } - - template - void all_gather(const T *src, T *dst, std::size_t count) const { - // Gather size elements from each rank - MPI_Allgather_c(src, count * sizeof(T), MPI_BYTE, dst, count * sizeof(T), - MPI_BYTE, mpi_comm_); - } - - template - void all_gather(const T &src, std::vector &dst) const { - assert(rng::size(dst) >= size_); - all_gather(&src, rng::data(dst), 1); - } - - template - void all_gather(const R &src, R &dst) const { - assert(rng::size(dst) >= size_ * rng::size(src)); - all_gather(rng::data(src), rng::data(dst), rng::size(src)); - } - - template - void i_all_gather(const T *src, T *dst, std::size_t count, - MPI_Request *req) const { - // Gather size elements from each rank - MPI_Iallgather_c(src, count * sizeof(T), MPI_BYTE, dst, count * sizeof(T), - MPI_BYTE, mpi_comm_, req); - } - - template - void i_all_gather(const T &src, std::vector &dst, MPI_Request *req) const { - assert(rng::size(dst) >= size_); - i_all_gather(&src, rng::data(dst), 1, req); - } - - void gatherv(const void *src, int *counts, int *offsets, void *dst, - std::size_t root) const { - MPI_Gatherv(src, counts[rank()], MPI_BYTE, dst, counts, offsets, MPI_BYTE, - root, mpi_comm_); - } - - // pointer with explicit tag - template - void isend(const T *data, std::size_t count, std::size_t dst_rank, auto tag, - MPI_Request *request) const { - MPI_Isend_c(data, count * sizeof(T), MPI_BYTE, dst_rank, int(tag), - mpi_comm_, request); - } - - // pointer, no tag - template - void isend(const T *data, std::size_t count, std::size_t dst_rank, - MPI_Request *request) const { - isend(data, count, dst_rank, 0, request); - } - - // range and tag - template - void isend(const R &data, std::size_t dst_rank, auto tag, - MPI_Request *request) const { - isend(rng::data(data), rng::size(data), dst_rank, tag, request); - } - - // range, no tag - template - void isend(const R &data, std::size_t dst_rank, MPI_Request *request) const { - isend(data, dst_rank, 0, request); - } - - // pointer and tag - template - void irecv(T *data, std::size_t size, std::size_t src_rank, auto tag, - MPI_Request *request) const { - MPI_Irecv_c(data, size * sizeof(T), MPI_BYTE, src_rank, int(tag), mpi_comm_, - request); - } - - // pointer, no tag - template - void irecv(T *data, std::size_t size, std::size_t src_rank, - MPI_Request *request) const { - irecv(data, size, src_rank, 0, request); - } - - // range and tag - template - void irecv(R &data, std::size_t src_rank, int tag, - MPI_Request *request) const { - irecv(rng::data(data), rng::size(data), src_rank, tag, request); - } - - // range, no tag - template - void irecv(R &data, std::size_t src_rank, MPI_Request *request) const { - irecv(data, src_rank, 0, request); - } - - template - void alltoall(const R &sendr, R &recvr, std::size_t count) { - alltoall(rng::data(sendr), rng::data(recvr), count); - } - - template - void alltoall(const T *send, T *receive, std::size_t count) { - std::size_t bytes = count * sizeof(T); - - timer time; - MPI_Alltoall_c(send, bytes, MPI_BYTE, receive, bytes, MPI_BYTE, mpi_comm_); - dr::drlog.debug(dr::logger::mpi, "alltoall bytes: {} elapsed: {}\n", bytes, - time.elapsed()); - } - - template - void alltoallv(const SendR &sendbuf, const std::vector &sendcnt, - const std::vector &senddsp, RecvR &recvbuf, - const std::vector &recvcnt, - const std::vector &recvdsp) { - using valT = typename RecvR::value_type; - - static_assert(std::is_same_v, - std::ranges::range_value_t>); - - assert(rng::size(sendcnt) == size_); - assert(rng::size(senddsp) == size_); - assert(rng::size(recvcnt) == size_); - assert(rng::size(recvdsp) == size_); - - std::vector _sendcnt(size_); - std::vector _senddsp(size_); - std::vector _recvcnt(size_); - std::vector _recvdsp(size_); - - rng::transform(sendcnt, _sendcnt.begin(), - [](auto e) { return e * sizeof(valT); }); - rng::transform(senddsp, _senddsp.begin(), - [](auto e) { return e * sizeof(valT); }); - rng::transform(recvcnt, _recvcnt.begin(), - [](auto e) { return e * sizeof(valT); }); - rng::transform(recvdsp, _recvdsp.begin(), - [](auto e) { return e * sizeof(valT); }); - - MPI_Alltoallv(rng::data(sendbuf), rng::data(_sendcnt), rng::data(_senddsp), - MPI_BYTE, rng::data(recvbuf), rng::data(_recvcnt), - rng::data(_recvdsp), MPI_BYTE, mpi_comm_); - } - - bool operator==(const communicator &other) const { - return mpi_comm_ == other.mpi_comm_; - } - -private: - MPI_Comm mpi_comm_; - std::size_t rank_; - std::size_t size_; -}; - -class rma_window { -public: - void create(communicator comm, void *data, std::size_t size) { - local_data_ = data; - communicator_ = comm; - DRLOG("win create:: size: {} data:{}", size, data); - MPI_Win_create(data, size, 1, MPI_INFO_NULL, comm.mpi_comm(), &win_); - } - - template auto local_data() { - return static_cast(local_data_); - } - - void free() { MPI_Win_free(&win_); } - - bool operator==(const rma_window other) const noexcept { - return this->win_ == other.win_; - } - - void set_null() { win_ = MPI_WIN_NULL; } - bool null() const noexcept { return win_ == MPI_WIN_NULL; } - - template T get(std::size_t rank, std::size_t disp) const { - T dst; - get(&dst, sizeof(T), rank, disp * sizeof(T)); - return dst; - } - - void get(void *dst, std::size_t size, std::size_t rank, - std::size_t disp) const { - DRLOG("MPI comm get:: ({}:{}:{})", rank, disp, size); - MPI_Request request; - MPI_Rget(dst, size, MPI_BYTE, rank, disp, size, MPI_BYTE, win_, &request); - MPI_Wait(&request, MPI_STATUS_IGNORE); - } - - void put(const auto &src, std::size_t rank, std::size_t disp) const { - put(&src, sizeof(src), rank, disp * sizeof(src)); - } - - void put(const void *src, std::size_t size, std::size_t rank, - std::size_t disp) const { - DRLOG("MPI comm put:: ({}:{}:{})", rank, disp, size); - MPI_Request request; - MPI_Rput(src, size, MPI_BYTE, rank, disp, size, MPI_BYTE, win_, &request); - DRLOG("MPI comm wait:: ({}:{}:{})", rank, disp, size); - MPI_Wait(&request, MPI_STATUS_IGNORE); - DRLOG("MPI comm wait finished:: ({}:{}:{})", rank, disp, size); - } - - void fence() const { - if (win_ != MPI_WIN_NULL) { - DRLOG("MPI comm fence:: win:{}", win_); - MPI_Win_fence(0, win_); - DRLOG("MPI comm fence finished:: win:{}", win_); - } else { - DRLOG("MPI comm fence skipped because win is NULL"); - } - } - - void flush(std::size_t rank) const { - DRLOG("MPI comm flush:: rank:{} win:{}", rank, win_); - MPI_Win_flush(rank, win_); - DRLOG("MPI comm flush finished:: rank:{} win:{}", rank, win_); - } - - const auto &communicator() const { return communicator_; } - auto mpi_win() { return win_; } - -private: - dr::communicator communicator_; - MPI_Win win_ = MPI_WIN_NULL; - void *local_data_ = nullptr; -}; - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp index 435282469ca..f452d67a9e6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp index 9b287e6602d..6747ef9a548 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdarray_shim.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdarray_shim.hpp deleted file mode 100644 index eff112d0d79..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdarray_shim.hpp +++ /dev/null @@ -1,7 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_shim.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_shim.hpp deleted file mode 100644 index 3b0230e9b75..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_shim.hpp +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#define MDSPAN_NAMESPACE std::experimental -namespace md = MDSPAN_NAMESPACE; diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_utils.hpp deleted file mode 100644 index d23c92a9b7b..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/mdspan_utils.hpp +++ /dev/null @@ -1,335 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -namespace dr::__detail { - -template auto dims(md::dextents extents) { - if constexpr (Rank == 1) { - return std::tuple(extents.extent(0)); - } else if constexpr (Rank == 2) { - return std::tuple(extents.extent(0), extents.extent(1)); - } else if constexpr (Rank == 3) { - return std::tuple(extents.extent(0), extents.extent(1), extents.extent(2)); - } else { - assert(false); - } -} - -template auto shape_to_strides(const Index &shape) { - const std::size_t rank = rng::size(shape); - Index strides; - strides[rank - 1] = 1; - for (std::size_t i = 1; i < rank; i++) { - strides[rank - i - 1] = strides[rank - i] * shape[rank - i]; - } - return strides; -} - -template -auto linear_to_index(std::size_t linear, const Index &shape) { - Index index, strides(shape_to_strides(shape)); - - for (std::size_t i = 0; i < rng::size(shape); i++) { - index[i] = linear / strides[i]; - linear = linear % strides[i]; - } - - return index; -} - -template -concept mdspan_like = requires(Mdspan &mdspan) { - mdspan.rank(); - mdspan.extents(); -}; - -template -concept mdarray_like = requires(Mdarray &mdarray) { mdarray.to_mdspan(); }; - -template using dr_extents = std::array; -template using md_extents = md::dextents; - -// -// Mdspan accessor using an iterator -// -template class mdspan_iter_accessor { -public: - using data_handle_type = Iter; - using reference = std::iter_reference_t; - using offset_policy = mdspan_iter_accessor; - - constexpr mdspan_iter_accessor() noexcept = default; - constexpr auto access(Iter iter, std::size_t index) const { - return iter[index]; - } - - constexpr auto offset(Iter iter, std::size_t index) const noexcept { - return iter + index; - } -}; - -template -auto make_submdspan_impl(M mdspan, const dr_extents &starts, - const dr_extents &ends, - std::index_sequence) { - return md::submdspan(mdspan, std::tuple(starts[indexes], ends[indexes])...); -} - -// Mdspan accepts slices, but that is hard to work with because it -// requires parameter packs. Work with starts/size vectors internally -// and use slices at the interface -template -auto make_submdspan(auto mdspan, const std::array &starts, - const std::array &ends) { - return make_submdspan_impl(mdspan, starts, ends, - std::make_index_sequence{}); -} - -template -void mdspan_foreach(md_extents extents, Op op, - dr_extents index = dr_extents(), - std::size_t rank = 0) { - for (index[rank] = 0; index[rank] < extents.extent(rank); index[rank]++) { - if (rank == Rank - 1) { - op(index); - } else { - mdspan_foreach(extents, op, index, rank + 1); - } - } -} - -// Pack mdspan into contiguous container -template -auto mdspan_copy(Src src, std::forward_iterator auto dst) { - __detail::event event; - - constexpr std::size_t rank = std::remove_cvref_t::rank(); - if (rank >= 2 && rank <= 3 && mhp::use_sycl()) { -#ifdef SYCL_LANGUAGE_VERSION - constexpr std::size_t rank = std::remove_cvref_t::rank(); - if constexpr (rank == 2) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), sycl::range(src.extent(0), src.extent(1)), - [src, dst](auto idx) { - dst[idx[0] * src.extent(1) + idx[1]] = src(idx); - }); - } else if constexpr (rank == 3) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), - sycl::range(src.extent(0), src.extent(1), src.extent(2)), - [src, dst](auto idx) { - dst[idx[0] * src.extent(1) * src.extent(2) + - idx[1] * src.extent(2) + idx[2]] = src(idx); - }); - } else { - assert(false); - } -#endif - } else { - auto pack = [src, &dst](auto index) { *dst++ = src(index); }; - mdspan_foreach(src.extents(), pack); - } - - return event; -} - -// unpack contiguous container into mdspan -template -auto mdspan_copy(std::forward_iterator auto src, Dst dst) { - __detail::event event; - - constexpr std::size_t rank = std::remove_cvref_t::rank(); - if (rank >= 2 && rank <= 3 && mhp::use_sycl()) { -#ifdef SYCL_LANGUAGE_VERSION - if constexpr (rank == 2) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), sycl::range(dst.extent(0), dst.extent(1)), - [src, dst](auto idx) { - dst(idx) = src[idx[0] * dst.extent(1) + idx[1]]; - }); - } else if constexpr (rank == 3) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), - sycl::range(dst.extent(0), dst.extent(1), dst.extent(2)), - [src, dst](auto idx) { - dst(idx) = src[idx[0] * dst.extent(1) * dst.extent(2) + - idx[1] * dst.extent(2) + idx[2]]; - }); - } else { - assert(false); - } -#endif - } else { - auto unpack = [&src, dst](auto index) { dst(index) = *src++; }; - mdspan_foreach(dst.extents(), unpack); - } - - return event; -} - -// copy mdspan to mdspan -auto mdspan_copy(mdspan_like auto src, mdspan_like auto dst) { - __detail::event event; - - assert(src.extents() == dst.extents()); - - constexpr std::size_t rank = std::remove_cvref_t::rank(); - if (rank >= 2 && rank <= 3 && mhp::use_sycl()) { -#ifdef SYCL_LANGUAGE_VERSION - dr::drlog.debug("mdspan_copy using sycl\n"); - if constexpr (rank == 2) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), sycl::range(dst.extent(0), dst.extent(1)), - [src, dst](auto idx) { dst(idx) = src(idx); }); - } else if constexpr (rank == 3) { - event = dr::__detail::parallel_for( - dr::mhp::sycl_queue(), - sycl::range(dst.extent(0), dst.extent(1), dst.extent(2)), - [src, dst](auto idx) { dst(idx) = src(idx); }); - } else { - assert(false); - } -#endif - } else { - - auto copy = [src, dst](auto index) { dst(index) = src(index); }; - mdspan_foreach(src.extents(), copy); - } - - return event; -} - -// For operator(), rearrange indices according to template arguments. -// -// For mdtranspose a(b); -// -// a(1, 2, 3) references b(3, 1, 2) -// -template -class mdtranspose : public Mdspan { -private: - static constexpr std::size_t rank_ = Mdspan::rank(); - -public: - // Inherit constructors from base class - mdtranspose(Mdspan &mdspan) : Mdspan(mdspan) {} - - // rearrange indices according to template arguments - template - auto &operator()(Indexes... indexes) const { - std::tuple index(indexes...); - return Mdspan::operator()(std::get(index)...); - } - auto &operator()(std::array index) const { - return Mdspan::operator()(index[Is]...); - } - - auto extents() const { - // To get the extents, we must invert the index mapping - std::array from_transposed({Is...}); - std::array extents_t; - for (std::size_t i = 0; i < rank_; i++) { - extents_t[from_transposed[i]] = Mdspan::extent(i); - } - - return md_extents(extents_t); - } - auto extent(std::size_t d) const { return extents().extent(d); } -}; - -} // namespace dr::__detail - -template -struct fmt::formatter : public formatter { - template - auto format(Mdspan mdspan, FmtContext &ctx) const { - std::array index; - rng::fill(index, 0); - format_mdspan(ctx, mdspan, index, 0); - return ctx.out(); - } - - void format_mdspan(auto &ctx, auto mdspan, auto &index, - std::size_t dim) const { - for (std::size_t i = 0; i < mdspan.extent(dim); i++) { - index[dim] = i; - if (dim == mdspan.rank() - 1) { - if (i == 0) { - format_to(ctx.out(), "{}: ", index); - } - format_to(ctx.out(), "{:4} ", mdspan(index)); - } else { - format_mdspan(ctx, mdspan, index, dim + 1); - } - } - format_to(ctx.out(), "\n"); - } -}; - -namespace MDSPAN_NAMESPACE { - -template -bool operator==(const M1 &m1, const M2 &m2) { - constexpr std::size_t rank1 = M1::rank(), rank2 = M2::rank(); - static_assert(rank1 == rank2); - if (dr::__detail::dims(m1.extents()) != - dr::__detail::dims(m2.extents())) { - return false; - } - - // See mdspan_foreach for a way to generalize this to all ranks - if constexpr (M1::rank() == 1) { - for (std::size_t i = 0; i < m1.extent(0); i++) { - if (m1(i) != m2(i)) { - return false; - } - } - } else if constexpr (M1::rank() == 2) { - for (std::size_t i = 0; i < m1.extent(0); i++) { - for (std::size_t j = 0; j < m1.extent(1); j++) { - if (m1(i, j) != m2(i, j)) { - return false; - } - } - } - } else if constexpr (M1::rank() == 3) { - for (std::size_t i = 0; i < m1.extent(0); i++) { - for (std::size_t j = 0; j < m1.extent(1); j++) { - for (std::size_t k = 0; k < m1.extent(2); k++) { - if (m1(i, j, k) != m2(i, j, k)) { - return false; - } - } - } - } - } else { - assert(false); - } - - return true; -} - -template -inline std::ostream &operator<<(std::ostream &os, const M &m) { - if constexpr (dr::__detail::mdarray_like) { - os << fmt::format("\n{}", m.to_mdspan()); - } else { - os << fmt::format("\n{}", m); - } - return os; -} - -} // namespace MDSPAN_NAMESPACE - -namespace dr { - -template -concept distributed_mdspan_range = - distributed_range && requires(R &r) { r.mdspan(); }; - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/memory.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/memory.hpp deleted file mode 100644 index 610d0eb4e9b..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/memory.hpp +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -namespace dr { - -template struct default_memory { - using value_type = T; - std::allocator std_allocator; - - T *allocate(std::size_t size) { - auto p = std_allocator.allocate(size); - assert(p != nullptr); - memset(p, 0, sizeof(T) * size); - return p; - } - - template F *allocate(std::size_t size) { - std::allocator allocator; - auto p = allocator.allocate(size); - assert(p != nullptr); - memset(p, 0, sizeof(F) * size); - return p; - } - - constexpr void deallocate(T *p, std::size_t n) { - std_allocator.deallocate(p, n); - } - - template void deallocate(F *p, std::size_t n) { - std::allocator allocator; - allocator.deallocate(p, n); - p = nullptr; - } - - void memcpy(void *dst, const void *src, std::size_t numBytes) { - std::memcpy(dst, src, numBytes); - } - - template void offload(F lambda) { lambda(); } -}; - -#ifdef SYCL_LANGUAGE_VERSION -template struct sycl_memory { - using value_type = T; - using device_type = sycl::device; - - sycl::device device_; - sycl::context context_; - sycl::usm::alloc kind_; - std::size_t alignment_; - sycl::queue offload_queue_; - - sycl_memory(sycl::queue queue, - sycl::usm::alloc kind = sycl::usm::alloc::shared, - std::size_t alignment = 1) - : device_(queue.get_device()), context_(queue.get_context()), kind_(kind), - alignment_(alignment), offload_queue_(queue) {} - - T *allocate(std::size_t n) { - auto p = sycl::aligned_alloc(alignment_, n, device_, context_, kind_); - assert(p != nullptr); - return p; - } - - template F *allocate(std::size_t n) { - auto p = sycl::aligned_alloc(alignment_, n, device_, context_, kind_); - assert(p != nullptr); - return p; - } - - void deallocate(T *p, std::size_t n) { - assert(p != nullptr); - sycl::free(p, context_); - p = nullptr; - } - - template void deallocate(F *p, std::size_t n) { - assert(p != nullptr); - sycl::free(p, context_); - p = nullptr; - } - - void memcpy(void *dst, const void *src, std::size_t numBytes) { - assert(dst != nullptr); - assert(src != nullptr); - offload_queue_.memcpy(dst, src, numBytes).wait(); - } - - template void offload(F lambda) { - if (kind_ == sycl::usm::alloc::device) { - offload_queue_.single_task(lambda).wait(); - } else { - lambda(); - } - } -}; -#endif - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/normal_distributed_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/normal_distributed_iterator.hpp deleted file mode 100644 index 83dc1440ada..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/normal_distributed_iterator.hpp +++ /dev/null @@ -1,123 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause -#pragma once - -#include - -namespace dr { - -template -/* -requires(dr::remote_range> && - rng::random_access_range>) - */ -class normal_distributed_iterator_accessor { -public: - using value_type = rng::range_value_t>; - - using segment_type = rng::range_value_t; - - using size_type = rng::range_size_t; - using difference_type = rng::range_difference_t; - - using reference = rng::range_reference_t; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = normal_distributed_iterator_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - constexpr normal_distributed_iterator_accessor() noexcept = default; - constexpr ~normal_distributed_iterator_accessor() noexcept = default; - constexpr normal_distributed_iterator_accessor( - const normal_distributed_iterator_accessor &) noexcept = default; - constexpr normal_distributed_iterator_accessor & - operator=(const normal_distributed_iterator_accessor &) noexcept = default; - - constexpr normal_distributed_iterator_accessor(V segments, - size_type segment_id, - size_type idx) noexcept - : segments_(segments), segment_id_(segment_id), idx_(idx) {} - - constexpr normal_distributed_iterator_accessor & - operator+=(difference_type offset) noexcept { - - while (offset > 0) { - difference_type current_offset = - std::min(offset, difference_type(segments_[segment_id_].size()) - - difference_type(idx_)); - idx_ += current_offset; - offset -= current_offset; - - if (idx_ >= segments_[segment_id_].size()) { - segment_id_++; - idx_ = 0; - } - } - - while (offset < 0) { - difference_type current_offset = - std::min(-offset, difference_type(idx_) + 1); - - difference_type new_idx = difference_type(idx_) - current_offset; - offset += current_offset; - - if (new_idx < 0) { - segment_id_--; - new_idx = segments_[segment_id_].size() - 1; - } - - idx_ = new_idx; - } - - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return segment_id_ == other.segment_id_ && idx_ == other.idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(get_global_idx()) - other.get_global_idx(); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - if (segment_id_ < other.segment_id_) { - return true; - } else if (segment_id_ == other.segment_id_) { - return idx_ < other.idx_; - } else { - return false; - } - } - - constexpr reference operator*() const noexcept { - return segments_[segment_id_][idx_]; - } - - auto segments() const noexcept { - return dr::__detail::drop_segments(segments_, segment_id_, idx_); - } - -private: - size_type get_global_idx() const noexcept { - size_type cumulative_size = 0; - for (std::size_t i = 0; i < segment_id_; i++) { - cumulative_size += segments_[i].size(); - } - return cumulative_size + idx_; - } - - rng::views::all_t segments_; - size_type segment_id_ = 0; - size_type idx_ = 0; -}; - -template -using normal_distributed_iterator = - dr::iterator_adaptor>; - -} // namespace dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp index 19c00b5dfd6..c9b2527c52d 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp @@ -6,7 +6,7 @@ #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp index 223433aa936..65c842c482d 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp index 0cc96bf9685..00c16fea735 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges_utils.hpp deleted file mode 100644 index 1d4f9351e0a..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges_utils.hpp +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -namespace dr::__detail { - -// -// std::ranges::enumerate handles unbounded ranges and returns a range -// where end() is a different type than begin(). Most of our code -// assumes std::ranges::common_range. bounded_enumerate requires a -// bounded range and returns a common_range. -// -template auto bounded_enumerate(R &&r) { - auto size = rng::distance(r); - using W = std::uint32_t; - return rng::views::zip(rng::views::iota(W(0), W(size)), r); -} - -} // namespace dr::__detail diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp index 964ffe111e2..8cf3efe0f54 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp @@ -6,8 +6,8 @@ #include -#include -#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp index 05bfb989786..e0654394da2 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp @@ -4,11 +4,11 @@ #pragma once -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp index c7cdcfbd9c0..618a3d644f6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp @@ -6,7 +6,7 @@ #include -#include +#include #ifdef SYCL_LANGUAGE_VERSION diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/tuple_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/tuple_utils.hpp deleted file mode 100644 index 194873fab05..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/tuple_utils.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -namespace dr::__detail { - -auto tuple_transform(auto tuple, auto op) { - auto transform = [op](auto &&...items) { - return std::make_tuple(op(items)...); - }; - return std::apply(transform, tuple); -} - -auto tie_transform(auto tuple, auto op) { - auto transform = [op](Items &&...items) { - return std::tie(op(std::forward(items))...); - }; - return std::apply(transform, tuple); -} - -auto tuple_foreach(auto tuple, auto op) { - auto transform = [op](auto... items) { (op(items), ...); }; - std::apply(transform, tuple); -} - -} // namespace dr::__detail diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp index b4750fa2f5a..ca8255a6cdc 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp @@ -9,11 +9,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp index 5e724499d00..0458bd56c19 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp @@ -11,14 +11,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp index a9527c1f1e8..82823cadedd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp @@ -9,11 +9,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp index 77802a521ff..582afeb1362 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp @@ -6,12 +6,12 @@ #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp index a6cdc828cb1..8a439d6e688 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp @@ -13,14 +13,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp index 23641501583..fa677d4e692 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp @@ -6,10 +6,10 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp index 9ff6f650284..c1cc896deaa 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp index 5f1ceb6c9e9..7c253735dcb 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp @@ -4,15 +4,15 @@ #pragma once -#include -#include - -#include -#include -#include -#include -#include -#include +#include +#include + +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp index 1d049667a5c..14774d7cbce 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #ifdef USE_MKL #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp index 925b44f9b6d..cb8569e088d 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include -#include +#include +#include +#include #ifdef USE_MKL #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp index 040b2568522..2d12c8f8c51 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp @@ -4,5 +4,5 @@ #pragma once -#include -#include +#include +#include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp index 04df2fc677b..c6257658f4a 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp @@ -9,10 +9,10 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include #include namespace { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp index 7fb0b516e50..9df8eb7e6b0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp @@ -9,9 +9,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp index 58aecae19b5..e1adfa97e4b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp @@ -3,9 +3,9 @@ // SPDX-License-Identifier: BSD-3-Clause #pragma once -#include -#include -#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp index eeff3b323a3..d9cf06b40db 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp @@ -8,7 +8,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp index 41427430b55..bceb73785c5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp @@ -6,14 +6,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp index 0e4aa24d481..11f76d66fdd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp index 0a460a1953f..c61a933753b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp index f4cc45308fa..ffa6669601c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include -#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp index be164517eb4..1ed0ed80b0c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp @@ -6,12 +6,12 @@ #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp index 8f7a93e44d9..f9b200f00e7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp @@ -4,15 +4,15 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp index 523be315078..e8af161cc31 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp index 21e72160be2..4d4244d388c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp @@ -8,7 +8,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp index bf2042fd4b0..2f4a5a27dcb 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #include #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp index 5f3a2282314..f9363147ea3 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp index a5b5144b5a1..b23d08474f0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp index b1423082253..988e3088a30 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp index 823862c21ba..5b7fe04c1ec 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp @@ -8,11 +8,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp index 1433e40b9c8..5be6c314bfd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp index e6f99238b08..3ec33f913ff 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp @@ -11,8 +11,8 @@ #include #include -#include -#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp index 7c6d7e29ac3..014592a292f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp index 4b9a4b7fd22..0aa845b03bf 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp index 678c9f0a8d5..166c2efe53d 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp @@ -6,7 +6,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp index 2e801ce8d85..6c29c4c8315 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #include #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp index f72e1fc3cb4..4b80acc5a0a 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp @@ -5,7 +5,7 @@ #pragma once #include -#include +#include #include #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp index 77ee2359ae1..564e37d9748 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp @@ -12,8 +12,8 @@ #include #include -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp index 0be6941398c..dad513f554a 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp index b67c5635cc8..9fa28306246 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp index fb24b6e6963..cfbc42fdc00 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp index 72a308a2679..7754c680e5f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp @@ -6,13 +6,13 @@ #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp index 5501e249cf8..9145e55fc56 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include -#include +#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp index b2518a8ea6c..a922d7fb16e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp index 1b4ba1aafe7..b607f484d8c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp @@ -4,12 +4,12 @@ #pragma once -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp index 0a66aa5f010..fcf269edef5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include namespace dr::shp::views { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp index c24b0b2f632..ce1e5133fcb 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp index af59ab70eaa..ab35e2ff032 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp @@ -8,8 +8,8 @@ #include #include -#include -#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp index 556beaba39e..9402f7a98f5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr { diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index b635c99d31a..e41a946462e 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -1,7 +1,6 @@ # SPDX-FileCopyrightText: Intel Corporation # # SPDX-License-Identifier: BSD-3-Clause - include(FetchContent) FetchContent_Declare( @@ -40,6 +39,11 @@ target_compile_definitions(dr_shp INTERFACE USE_MKL _GLIBCXX_USE_TBB_PAR_BACKEND=0) target_link_libraries(dr_shp INTERFACE range-v3 fmt::fmt MKL::MKL_DPCPP) +if (DEFINED ONEDPL_USE_DR) + target_compile_options(dr_shp INTERFACE "-DONEDPL_USE_DISTRIBUTED_RANGES") +endif() + + # For use, see: # https://github.com/illuhad/hipSYCL/blob/develop/doc/using-hipsycl.md#using-the-cmake-integration # example: cmake .. -DhipSYCL_DIR= @@ -53,6 +57,9 @@ endif() set(CMAKE_INCLUDE_CURRENT_DIR ON) + + + add_executable( shp-tests shp-tests.cpp ../common/all.cpp ../common/copy.cpp ../common/counted.cpp @@ -84,4 +91,4 @@ endfunction() add_shp_ctest(shp-tests shp-tests) add_shp_ctest(shp-tests-3 shp-tests --devicesCount 3) -add_shp_ctest(shp-tests-3-only shp-tests-3 --devicesCount 3) +add_shp_ctest(shp-tests-3-only shp-tests-3 --devicesCount 3) \ No newline at end of file diff --git a/test/distributed-ranges/shp/detail.cpp b/test/distributed-ranges/shp/detail.cpp index 8ff95b4465a..145fad179cb 100644 --- a/test/distributed-ranges/shp/detail.cpp +++ b/test/distributed-ranges/shp/detail.cpp @@ -3,7 +3,7 @@ // SPDX-License-Identifier: BSD-3-Clause #include "xhp-tests.hpp" -#include +#include namespace shp = dr::shp; diff --git a/test/distributed-ranges/shp/xhp-tests.hpp b/test/distributed-ranges/shp/xhp-tests.hpp index 2ab612d9635..96fa25e52df 100644 --- a/test/distributed-ranges/shp/xhp-tests.hpp +++ b/test/distributed-ranges/shp/xhp-tests.hpp @@ -4,11 +4,11 @@ #pragma once #include "cxxopts.hpp" -#include -#include #include #include #include +#include +#include #define TEST_SHP @@ -31,6 +31,6 @@ concept compliant_view = rng::forward_range && requires(V &v) { dr::ranges::rank(dr::ranges::segments(v)[0]); }; -#include "common-tests.hpp" +#include "../include/common-tests.hpp" -using AllTypes = ::testing::Types>; +using AllTypes = ::testing::Types>; \ No newline at end of file From b66e052bec5e03d94f628f870275c5d58e76f745 Mon Sep 17 00:00:00 2001 From: Julian Miller Date: Fri, 15 Mar 2024 12:21:49 +0100 Subject: [PATCH 03/29] Extend compiler support for the unified USM and buffer storage (#1410) * Extend compiler support for USM buffer storage * Workaround access::decorated * Address review comment --- .../hetero/dpcpp/parallel_backend_sycl_reduce.h | 12 +++++++++--- .../hetero/dpcpp/parallel_backend_sycl_utils.h | 17 ++++++++++++++++- .../oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 6 +++--- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 52b036ab4b3..c856083041e 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -126,7 +126,9 @@ struct __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __ite __cgh.parallel_for<_Name...>( sycl::nd_range<1>(sycl::range<1>(__work_group_size), sycl::range<1>(__work_group_size)), [=](sycl::nd_item<1> __item_id) { - auto __res_ptr = __res_acc.__get_pointer(); + auto __res_ptr = + __usm_host_or_buffer_storage<_ExecutionPolicy, _Tp>::__get_usm_host_or_buffer_accessor_ptr( + __res_acc); __work_group_reduce_kernel<_Tp>(__item_id, __n, __transform_pattern, __reduce_pattern, __init, __temp_local, __res_ptr, __rngs...); }); @@ -243,7 +245,9 @@ struct __parallel_transform_reduce_work_group_kernel_submitter< __cgh.parallel_for<_KernelName...>( sycl::nd_range<1>(sycl::range<1>(__work_group_size2), sycl::range<1>(__work_group_size2)), [=](sycl::nd_item<1> __item_id) { - auto __res_ptr = __res_acc.__get_pointer(); + auto __res_ptr = + __usm_host_or_buffer_storage<_ExecutionPolicy, _Tp>::__get_usm_host_or_buffer_accessor_ptr( + __res_acc); __work_group_reduce_kernel<_Tp>(__item_id, __n, __transform_pattern, __reduce_pattern, __init, __temp_local, __res_ptr, __temp_acc); }); @@ -355,7 +359,9 @@ struct __parallel_transform_reduce_impl sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), sycl::range<1>(__work_group_size)), [=](sycl::nd_item<1> __item_id) { - auto __res_ptr = __res_acc.__get_pointer(); + auto __res_ptr = + __usm_host_or_buffer_storage<_ExecutionPolicy, _Tp>::__get_usm_host_or_buffer_accessor_ptr( + __res_acc); auto __local_idx = __item_id.get_local_id(0); auto __group_idx = __item_id.get_group(0); // 1. Initialization (transform part). Fill local memory diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index 62025411b24..c0c0ee40946 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -510,7 +510,7 @@ struct __usm_host_or_buffer_storage __use_USM_host_allocations(sycl::queue __queue) { // A buffer is used by default. Supporting compilers use the unified future on top of USM host memory or a buffer. -#if _ONEDPL_SYCL_USM_HOST_PRESENT +#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT auto __device = __queue.get_device(); if (!__device.is_gpu()) return false; @@ -540,11 +540,26 @@ struct __usm_host_or_buffer_storage } } + template + static auto + __get_usm_host_or_buffer_accessor_ptr(const _Acc& __acc) + { +#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT + return __acc.__get_pointer(); +#else + return &__acc[0]; +#endif + } + auto __get_acc(sycl::handler& __cgh) { +#if _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT return __usm ? __usm_host_or_buffer_accessor<_T>(__cgh, __usm_buf.get()) : __usm_host_or_buffer_accessor<_T>(__cgh, __sycl_buf.get()); +#else + return sycl::accessor(*__sycl_buf.get(), __cgh, sycl::read_write, __dpl_sycl::__no_init{}); +#endif } _T diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index d864337087c..a4c470502fe 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -82,12 +82,12 @@ # define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE_IF_SUPPORTED(SIZE) #endif -// The unified future supporting USM host memory and buffers is only supported after DPCPP 2023.1 +// The unified future supporting USM memory and buffers is only supported after DPCPP 2023.1 // but not by 2023.2. #if (_ONEDPL_LIBSYCL_VERSION >= 60100 && _ONEDPL_LIBSYCL_VERSION != 60200) -# define _ONEDPL_SYCL_USM_HOST_PRESENT 1 +# define _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT 1 #else -# define _ONEDPL_SYCL_USM_HOST_PRESENT 0 +# define _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT 0 #endif namespace __dpl_sycl From 2f4897fc820d733eaa803698430d111a4bf1e38b Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" Date: Mon, 18 Mar 2024 12:38:36 +0000 Subject: [PATCH 04/29] cleanups of include paths --- CMakeLists.txt | 2 +- .../concepts/concepts.hpp | 2 +- .../detail/enumerate.hpp | 2 +- .../detail/iterator_adaptor.hpp | 2 +- .../detail/onedpl_direct_iterator.hpp | 2 +- .../detail/owning_view.hpp | 2 +- .../distributed_ranges_impl/detail/ranges.hpp | 2 +- .../detail/remote_subrange.hpp | 4 ++-- .../detail/segments_tools.hpp | 10 +++++----- .../detail/sycl_utils.hpp | 2 +- .../shp/algorithms/copy.hpp | 10 +++++----- .../shp/algorithms/exclusive_scan.hpp | 16 ++++++++-------- .../shp/algorithms/fill.hpp | 10 +++++----- .../shp/algorithms/for_each.hpp | 12 ++++++------ .../shp/algorithms/inclusive_scan.hpp | 16 ++++++++-------- .../shp/algorithms/iota.hpp | 8 ++++---- .../shp/algorithms/matrix/gemm.hpp | 4 ++-- .../shp/algorithms/matrix/gemv.hpp | 18 +++++++++--------- .../shp/algorithms/matrix/local_gemm.hpp | 2 +- .../shp/algorithms/matrix/local_gemv.hpp | 6 +++--- .../algorithms/matrix/matrix_algorithms.hpp | 4 ++-- .../shp/algorithms/reduce.hpp | 8 ++++---- .../shp/algorithms/sort.hpp | 6 +++--- .../shp/algorithms/transform.hpp | 6 +++--- .../distributed_ranges_impl/shp/allocators.hpp | 2 +- .../containers/distributed_dense_matrix.hpp | 16 ++++++++-------- .../shp/containers/duplicated_vector.hpp | 4 ++-- .../shp/containers/matrix_entry.hpp | 2 +- .../shp/containers/matrix_partition.hpp | 6 +++--- .../shp/containers/sequential/dense_matrix.hpp | 12 ++++++------ .../shp/containers/sparse_matrix.hpp | 18 +++++++++--------- .../distributed_ranges_impl/shp/detail.hpp | 8 ++++---- .../distributed_ranges_impl/shp/device_ptr.hpp | 2 +- .../distributed_ranges_impl/shp/device_ref.hpp | 2 +- .../shp/device_span.hpp | 4 ++-- .../shp/device_vector.hpp | 4 ++-- .../shp/distributed_span.hpp | 10 +++++----- .../shp/distributed_vector.hpp | 10 +++++----- .../distributed_ranges_impl/shp/future.hpp | 2 +- .../distributed_ranges_impl/shp/init.hpp | 4 ++-- .../distributed_ranges_impl/shp/range.hpp | 4 ++-- .../shp/range_adaptors.hpp | 4 ++-- .../distributed_ranges_impl/shp/span.hpp | 2 +- .../shp/util/coo_matrix.hpp | 2 +- .../shp/util/generate_random.hpp | 2 +- .../shp/util/matrix_io.hpp | 4 ++-- .../shp/views/csr_matrix_view.hpp | 4 ++-- .../shp/views/dense_column_view.hpp | 4 ++-- .../shp/views/dense_matrix_iterator.hpp | 10 +++++----- .../shp/views/dense_matrix_view.hpp | 14 +++++++------- .../shp/views/dense_row_view.hpp | 6 +++--- .../shp/views/enumerate.hpp | 2 +- .../shp/views/standard_views.hpp | 12 ++++++------ .../shp/views/views.hpp | 8 ++++---- .../distributed_ranges_impl/shp/zip_view.hpp | 10 +++++----- .../views/transform.hpp | 4 ++-- .../distributed_ranges_impl/views/views.hpp | 4 ++-- test/distributed-ranges/shp/CMakeLists.txt | 2 +- 58 files changed, 180 insertions(+), 180 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ed8edfaf65..a2ddcd42a17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -317,7 +317,7 @@ if (ONEDPL_BACKEND MATCHES "^(tbb|dpcpp|dpcpp_only)$") set(ONEDPL_USE_DR TRUE) message(STATUS "Adding Distributed Ranges to the project") else() - message(STATUS "Distributed Ranges not supported by the compiler") + message(STATUS "C++20 required to use Distributed Ranges in oneDPL") endif() endif() diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp index c0b54ead437..0894f710336 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp index f452d67a9e6..a7ac841052c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp index 6747ef9a548..bc6a63b8db9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp index c9b2527c52d..86c572ef200 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp @@ -6,7 +6,7 @@ #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp index 65c842c482d..d6027c6823b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp index 00c16fea735..11fa8d5c524 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp index 8cf3efe0f54..50147347693 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp @@ -6,8 +6,8 @@ #include -#include -#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp index e0654394da2..59fce0dd761 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp @@ -4,11 +4,11 @@ #pragma once -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp index 618a3d644f6..143f3e8612e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp @@ -6,7 +6,7 @@ #include -#include +#include #ifdef SYCL_LANGUAGE_VERSION diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp index ca8255a6cdc..ebdc6425da3 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp @@ -9,11 +9,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp index 0458bd56c19..0ae647b29c8 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp @@ -11,14 +11,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp index 82823cadedd..26226c63f21 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp @@ -9,11 +9,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp index 582afeb1362..0266bdea344 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp @@ -6,12 +6,12 @@ #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp index 8a439d6e688..15fbd9467dc 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp @@ -13,14 +13,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp index fa677d4e692..bfc250abfe6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp @@ -6,10 +6,10 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp index c1cc896deaa..e860a5ed5a9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp index 7c253735dcb..a4c0842f744 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp @@ -4,15 +4,15 @@ #pragma once -#include -#include - -#include -#include -#include -#include -#include -#include +#include +#include + +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp index 14774d7cbce..b7cd17dcc11 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #ifdef USE_MKL #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp index cb8569e088d..142792ecfde 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include -#include +#include +#include +#include #ifdef USE_MKL #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp index 2d12c8f8c51..36182acf517 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp @@ -4,5 +4,5 @@ #pragma once -#include -#include +#include +#include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp index c6257658f4a..7bfd00eb178 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp @@ -9,10 +9,10 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include #include namespace { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp index 9df8eb7e6b0..b0a595cc62c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp @@ -9,9 +9,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp index e1adfa97e4b..50eebd0698e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp @@ -3,9 +3,9 @@ // SPDX-License-Identifier: BSD-3-Clause #pragma once -#include -#include -#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp index d9cf06b40db..10beee77ca9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp @@ -8,7 +8,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp index bceb73785c5..c70f5aff017 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp @@ -6,14 +6,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp index 11f76d66fdd..904458e5777 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp index c61a933753b..df29dda07df 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp index ffa6669601c..5574450ffc5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include -#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp index 1ed0ed80b0c..92b25a5e34e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp @@ -6,12 +6,12 @@ #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp index f9b200f00e7..909009139e4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp @@ -4,16 +4,16 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp index e8af161cc31..b4d76f98358 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp @@ -4,11 +4,11 @@ #pragma once -#include -#include -#include -#include #include +#include +#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp index 4d4244d388c..71dd37a5ae6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp @@ -8,7 +8,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp index 2f4a5a27dcb..8b51ca2122e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #include #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp index f9363147ea3..2a779f900c2 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp index b23d08474f0..0d6c97a5dcf 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp index 988e3088a30..265053504b7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp index 5b7fe04c1ec..a459a8df072 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp @@ -8,11 +8,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp index 5be6c314bfd..185dcff1016 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp index 3ec33f913ff..b31eb2b3fa5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp @@ -11,9 +11,9 @@ #include #include -#include -#include #include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp index 014592a292f..36fb81dd3c5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp index 0aa845b03bf..4373e51bb5b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp index 166c2efe53d..e297405d30c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp @@ -6,7 +6,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp index 6c29c4c8315..f95649a0135 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp index 4b80acc5a0a..f5ebc9ae9b5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp @@ -5,8 +5,8 @@ #pragma once #include -#include #include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp index 564e37d9748..ff6bf29c357 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp @@ -12,8 +12,8 @@ #include #include -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp index dad513f554a..fecf63954ef 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include #include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp index 9fa28306246..71286bd4734 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include #include +#include +#include namespace dr::shp { template class dense_matrix_column_accessor { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp index cfbc42fdc00..fb2ff89b914 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp index 7754c680e5f..dfe28d46f68 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp @@ -6,13 +6,13 @@ #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp index 9145e55fc56..18d8e1d82b0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include -#include #include +#include +#include +#include namespace dr::shp { template class dense_matrix_row_accessor { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp index a922d7fb16e..c3455e9585a 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp index b607f484d8c..aa5887c50cd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp @@ -4,12 +4,12 @@ #pragma once -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp index fcf269edef5..3f7e4449266 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include namespace dr::shp::views { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp index ce1e5133fcb..81971f834bc 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp index ab35e2ff032..cbf35f084e4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp @@ -8,8 +8,8 @@ #include #include -#include -#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp index 9402f7a98f5..72cf8ea162f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr { diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index e41a946462e..17fa366ef16 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -91,4 +91,4 @@ endfunction() add_shp_ctest(shp-tests shp-tests) add_shp_ctest(shp-tests-3 shp-tests --devicesCount 3) -add_shp_ctest(shp-tests-3-only shp-tests-3 --devicesCount 3) \ No newline at end of file +add_shp_ctest(shp-tests-3-only shp-tests-3 --devicesCount 3) From 9189849542afd97c77249d16db8b42172a09e414 Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" Date: Mon, 18 Mar 2024 12:38:36 +0000 Subject: [PATCH 05/29] cleanups of include paths and error messages --- CMakeLists.txt | 2 +- include/oneapi/dpl/distributed-ranges | 2 +- .../concepts/concepts.hpp | 2 +- .../detail/enumerate.hpp | 2 +- .../detail/iterator_adaptor.hpp | 2 +- .../detail/onedpl_direct_iterator.hpp | 2 +- .../detail/owning_view.hpp | 2 +- .../distributed_ranges_impl/detail/ranges.hpp | 2 +- .../detail/remote_subrange.hpp | 4 ++-- .../detail/segments_tools.hpp | 10 +++++----- .../detail/sycl_utils.hpp | 2 +- .../shp/algorithms/copy.hpp | 10 +++++----- .../shp/algorithms/exclusive_scan.hpp | 16 ++++++++-------- .../shp/algorithms/fill.hpp | 10 +++++----- .../shp/algorithms/for_each.hpp | 12 ++++++------ .../shp/algorithms/inclusive_scan.hpp | 16 ++++++++-------- .../shp/algorithms/iota.hpp | 8 ++++---- .../shp/algorithms/matrix/gemm.hpp | 4 ++-- .../shp/algorithms/matrix/gemv.hpp | 18 +++++++++--------- .../shp/algorithms/matrix/local_gemm.hpp | 2 +- .../shp/algorithms/matrix/local_gemv.hpp | 6 +++--- .../algorithms/matrix/matrix_algorithms.hpp | 4 ++-- .../shp/algorithms/reduce.hpp | 8 ++++---- .../shp/algorithms/sort.hpp | 6 +++--- .../shp/algorithms/transform.hpp | 6 +++--- .../distributed_ranges_impl/shp/allocators.hpp | 2 +- .../containers/distributed_dense_matrix.hpp | 16 ++++++++-------- .../shp/containers/duplicated_vector.hpp | 4 ++-- .../shp/containers/matrix_entry.hpp | 2 +- .../shp/containers/matrix_partition.hpp | 6 +++--- .../shp/containers/sequential/dense_matrix.hpp | 12 ++++++------ .../shp/containers/sparse_matrix.hpp | 18 +++++++++--------- .../distributed_ranges_impl/shp/detail.hpp | 8 ++++---- .../distributed_ranges_impl/shp/device_ptr.hpp | 2 +- .../distributed_ranges_impl/shp/device_ref.hpp | 2 +- .../shp/device_span.hpp | 4 ++-- .../shp/device_vector.hpp | 4 ++-- .../shp/distributed_span.hpp | 10 +++++----- .../shp/distributed_vector.hpp | 10 +++++----- .../distributed_ranges_impl/shp/future.hpp | 2 +- .../distributed_ranges_impl/shp/init.hpp | 4 ++-- .../distributed_ranges_impl/shp/range.hpp | 4 ++-- .../shp/range_adaptors.hpp | 4 ++-- .../distributed_ranges_impl/shp/span.hpp | 2 +- .../shp/util/coo_matrix.hpp | 2 +- .../shp/util/generate_random.hpp | 2 +- .../shp/util/matrix_io.hpp | 4 ++-- .../shp/views/csr_matrix_view.hpp | 4 ++-- .../shp/views/dense_column_view.hpp | 4 ++-- .../shp/views/dense_matrix_iterator.hpp | 10 +++++----- .../shp/views/dense_matrix_view.hpp | 14 +++++++------- .../shp/views/dense_row_view.hpp | 6 +++--- .../shp/views/enumerate.hpp | 2 +- .../shp/views/standard_views.hpp | 12 ++++++------ .../shp/views/views.hpp | 8 ++++---- .../distributed_ranges_impl/shp/zip_view.hpp | 10 +++++----- .../views/transform.hpp | 4 ++-- .../distributed_ranges_impl/views/views.hpp | 4 ++-- test/distributed-ranges/shp/CMakeLists.txt | 2 +- 59 files changed, 181 insertions(+), 181 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ed8edfaf65..a2ddcd42a17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -317,7 +317,7 @@ if (ONEDPL_BACKEND MATCHES "^(tbb|dpcpp|dpcpp_only)$") set(ONEDPL_USE_DR TRUE) message(STATUS "Adding Distributed Ranges to the project") else() - message(STATUS "Distributed Ranges not supported by the compiler") + message(STATUS "C++20 required to use Distributed Ranges in oneDPL") endif() endif() diff --git a/include/oneapi/dpl/distributed-ranges b/include/oneapi/dpl/distributed-ranges index f466b407737..4d79e5598d5 100644 --- a/include/oneapi/dpl/distributed-ranges +++ b/include/oneapi/dpl/distributed-ranges @@ -17,7 +17,7 @@ #if defined(ONEDPL_USE_DISTRIBUTED_RANGES) #include "oneapi/dpl/internal/distributed_ranges_impl/shp.hpp" #else -#error "Compiler does not support Distributed Ranges" +#error "C++20 required to use Distributed Ranges" #endif #endif /* _ONEDPL_DISTRIBUTED_RANGES */ diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp index c0b54ead437..0894f710336 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp index f452d67a9e6..a7ac841052c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp index 6747ef9a548..bc6a63b8db9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp index c9b2527c52d..86c572ef200 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp @@ -6,7 +6,7 @@ #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp index 65c842c482d..d6027c6823b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp index 00c16fea735..11fa8d5c524 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp index 8cf3efe0f54..50147347693 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp @@ -6,8 +6,8 @@ #include -#include -#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp index e0654394da2..59fce0dd761 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp @@ -4,11 +4,11 @@ #pragma once -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp index 618a3d644f6..143f3e8612e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp @@ -6,7 +6,7 @@ #include -#include +#include #ifdef SYCL_LANGUAGE_VERSION diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp index ca8255a6cdc..ebdc6425da3 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp @@ -9,11 +9,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp index 0458bd56c19..0ae647b29c8 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp @@ -11,14 +11,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp index 82823cadedd..26226c63f21 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp @@ -9,11 +9,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp index 582afeb1362..0266bdea344 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp @@ -6,12 +6,12 @@ #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp index 8a439d6e688..15fbd9467dc 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp @@ -13,14 +13,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp index fa677d4e692..bfc250abfe6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp @@ -6,10 +6,10 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp index c1cc896deaa..e860a5ed5a9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp index 7c253735dcb..a4c0842f744 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp @@ -4,15 +4,15 @@ #pragma once -#include -#include - -#include -#include -#include -#include -#include -#include +#include +#include + +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp index 14774d7cbce..b7cd17dcc11 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #ifdef USE_MKL #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp index cb8569e088d..142792ecfde 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include -#include +#include +#include +#include #ifdef USE_MKL #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp index 2d12c8f8c51..36182acf517 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp @@ -4,5 +4,5 @@ #pragma once -#include -#include +#include +#include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp index c6257658f4a..7bfd00eb178 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp @@ -9,10 +9,10 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include #include namespace { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp index 9df8eb7e6b0..b0a595cc62c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp @@ -9,9 +9,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp index e1adfa97e4b..50eebd0698e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp @@ -3,9 +3,9 @@ // SPDX-License-Identifier: BSD-3-Clause #pragma once -#include -#include -#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp index d9cf06b40db..10beee77ca9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp @@ -8,7 +8,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp index bceb73785c5..c70f5aff017 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp @@ -6,14 +6,14 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp index 11f76d66fdd..904458e5777 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp index c61a933753b..df29dda07df 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp @@ -8,7 +8,7 @@ #include #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp index ffa6669601c..5574450ffc5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include -#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp index 1ed0ed80b0c..92b25a5e34e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp @@ -6,12 +6,12 @@ #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp index f9b200f00e7..909009139e4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp @@ -4,16 +4,16 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp index e8af161cc31..b4d76f98358 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp @@ -4,11 +4,11 @@ #pragma once -#include -#include -#include -#include #include +#include +#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp index 4d4244d388c..71dd37a5ae6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp @@ -8,7 +8,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp index 2f4a5a27dcb..8b51ca2122e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include #include #include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp index f9363147ea3..2a779f900c2 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp index b23d08474f0..0d6c97a5dcf 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp index 988e3088a30..265053504b7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp index 5b7fe04c1ec..a459a8df072 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp @@ -8,11 +8,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp index 5be6c314bfd..185dcff1016 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp @@ -7,7 +7,7 @@ #include #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp index 3ec33f913ff..b31eb2b3fa5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp @@ -11,9 +11,9 @@ #include #include -#include -#include #include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp index 014592a292f..36fb81dd3c5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp index 0aa845b03bf..4373e51bb5b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp index 166c2efe53d..e297405d30c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp @@ -6,7 +6,7 @@ #include -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp index 6c29c4c8315..f95649a0135 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp index 4b80acc5a0a..f5ebc9ae9b5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp @@ -5,8 +5,8 @@ #pragma once #include -#include #include +#include #include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp index 564e37d9748..ff6bf29c357 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp @@ -12,8 +12,8 @@ #include #include -#include -#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp index dad513f554a..fecf63954ef 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include #include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp index 9fa28306246..71286bd4734 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp @@ -4,9 +4,9 @@ #pragma once -#include -#include #include +#include +#include namespace dr::shp { template class dense_matrix_column_accessor { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp index cfbc42fdc00..fb2ff89b914 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp index 7754c680e5f..dfe28d46f68 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp @@ -6,13 +6,13 @@ #include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp index 9145e55fc56..18d8e1d82b0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include -#include #include +#include +#include +#include namespace dr::shp { template class dense_matrix_row_accessor { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp index a922d7fb16e..c3455e9585a 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp @@ -4,7 +4,7 @@ #pragma once -#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp index b607f484d8c..aa5887c50cd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp @@ -4,12 +4,12 @@ #pragma once -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include namespace dr::shp { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp index fcf269edef5..3f7e4449266 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp @@ -4,10 +4,10 @@ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include namespace dr::shp::views { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp index ce1e5133fcb..81971f834bc 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp @@ -6,11 +6,11 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp index ab35e2ff032..cbf35f084e4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp @@ -8,8 +8,8 @@ #include #include -#include -#include +#include +#include namespace dr { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp index 9402f7a98f5..72cf8ea162f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp @@ -4,8 +4,8 @@ #pragma once -#include -#include +#include +#include namespace dr { diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index e41a946462e..17fa366ef16 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -91,4 +91,4 @@ endfunction() add_shp_ctest(shp-tests shp-tests) add_shp_ctest(shp-tests-3 shp-tests --devicesCount 3) -add_shp_ctest(shp-tests-3-only shp-tests-3 --devicesCount 3) \ No newline at end of file +add_shp_ctest(shp-tests-3-only shp-tests-3 --devicesCount 3) From 7eae79ef0257ea89358d5a22fb2b9c98c723981d Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Wed, 20 Mar 2024 14:28:38 +0100 Subject: [PATCH 06/29] [oneDPL] Implementation of tag dispatching on current codebase (#1239) --- .../internal/async_impl/async_impl_hetero.h | 118 +- .../dpl/internal/async_impl/glue_async_impl.h | 100 +- .../oneapi/dpl/internal/binary_search_impl.h | 94 +- .../internal/exclusive_scan_by_segment_impl.h | 62 +- .../internal/inclusive_scan_by_segment_impl.h | 56 +- .../dpl/internal/reduce_by_segment_impl.h | 62 +- .../dpl/internal/scan_by_segment_impl.h | 17 +- include/oneapi/dpl/pstl/algorithm_fwd.h | 1106 ++++---- include/oneapi/dpl/pstl/algorithm_impl.h | 2310 +++++++++-------- include/oneapi/dpl/pstl/execution_defs.h | 92 +- include/oneapi/dpl/pstl/execution_impl.h | 159 +- .../dpl/pstl/experimental/internal/for_loop.h | 12 + .../experimental/internal/for_loop_impl.h | 96 +- include/oneapi/dpl/pstl/glue_algorithm_impl.h | 567 ++-- .../dpl/pstl/glue_algorithm_ranges_impl.h | 128 +- include/oneapi/dpl/pstl/glue_memory_impl.h | 226 +- include/oneapi/dpl/pstl/glue_numeric_impl.h | 75 +- .../dpl/pstl/glue_numeric_ranges_impl.h | 28 +- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 937 +++---- .../hetero/algorithm_ranges_impl_hetero.h | 273 +- .../pstl/hetero/dpcpp/execution_sycl_defs.h | 89 +- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 185 +- .../hetero/dpcpp/parallel_backend_sycl_fpga.h | 196 +- .../dpcpp/parallel_backend_sycl_histogram.h | 52 +- .../dpcpp/parallel_backend_sycl_radix_sort.h | 3 +- .../dpcpp/parallel_backend_sycl_reduce.h | 83 +- .../dpcpp/parallel_backend_sycl_utils.h | 2 - .../dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h | 26 + .../dpl/pstl/hetero/histogram_impl_hetero.h | 17 +- .../dpl/pstl/hetero/numeric_impl_hetero.h | 106 +- .../pstl/hetero/numeric_ranges_impl_hetero.h | 70 +- include/oneapi/dpl/pstl/histogram_impl.h | 16 +- include/oneapi/dpl/pstl/iterator_defs.h | 60 +- include/oneapi/dpl/pstl/numeric_fwd.h | 131 +- include/oneapi/dpl/pstl/numeric_impl.h | 183 +- include/oneapi/dpl/pstl/omp/parallel_for.h | 2 +- .../oneapi/dpl/pstl/omp/parallel_for_each.h | 3 +- include/oneapi/dpl/pstl/omp/parallel_invoke.h | 2 +- include/oneapi/dpl/pstl/omp/parallel_merge.h | 7 +- include/oneapi/dpl/pstl/omp/parallel_reduce.h | 4 +- include/oneapi/dpl/pstl/omp/parallel_scan.h | 17 +- .../dpl/pstl/omp/parallel_stable_sort.h | 5 +- .../dpl/pstl/omp/parallel_transform_reduce.h | 5 +- .../dpl/pstl/omp/parallel_transform_scan.h | 4 +- include/oneapi/dpl/pstl/omp/util.h | 9 +- include/oneapi/dpl/pstl/parallel_backend.h | 6 +- .../oneapi/dpl/pstl/parallel_backend_serial.h | 42 +- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 50 +- include/oneapi/dpl/pstl/parallel_impl.h | 19 +- .../experimental/for_loop.pass.cpp | 10 +- .../experimental/for_loop_induction.pass.cpp | 6 +- .../experimental/for_loop_reduction.pass.cpp | 16 +- 52 files changed, 3959 insertions(+), 3985 deletions(-) diff --git a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h index cd080b1e6a1..f053974f7b2 100644 --- a/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h +++ b/include/oneapi/dpl/internal/async_impl/async_impl_hetero.h @@ -30,10 +30,10 @@ namespace dpl namespace __internal { -template = 0> +template auto -__pattern_walk1_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) +__pattern_walk1_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) { auto __n = __last - __first; assert(__n > 0); @@ -43,19 +43,19 @@ __pattern_walk1_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forw auto __buf = __keep(__first, __last); auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf.all_view()); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()); return __future_obj; } template = 0> + typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, + typename _Function> auto -__pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f) +__pattern_walk2_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { auto __n = __last1 - __first1; assert(__n > 0); @@ -67,8 +67,8 @@ __pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo auto __buf2 = __keep2(__first2, __first2 + __n); auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view()); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); if constexpr (_IsSync::value) __future.wait(); @@ -76,11 +76,11 @@ __pattern_walk2_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo return __future.__make_future(__first2 + __n); } -template = 0> +template auto -__pattern_walk3_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) +__pattern_walk3_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) { auto __n = __last1 - __first1; assert(__n > 0); @@ -95,20 +95,22 @@ __pattern_walk3_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator3>(); auto __buf3 = __keep3(__first3, __first3 + __n); - auto __future = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view(), __buf3.all_view()); + auto __future = + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, + __buf1.all_view(), __buf2.all_view(), __buf3.all_view()); return __future.__make_future(__first3 + __n); } -template = 0> +template auto -__pattern_walk2_brick_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick) +__pattern_walk2_brick_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { return __pattern_walk2_async( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __first1, __last1, __first2, __brick); } @@ -117,11 +119,10 @@ __pattern_walk2_brick_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, +__pattern_transform_reduce_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { @@ -141,7 +142,7 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterato return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf1.all_view(), __buf2.all_view()); } @@ -150,12 +151,12 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _RandomAccessIterato // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) +__pattern_transform_reduce_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) { assert(__first < __last); @@ -168,18 +169,18 @@ __pattern_transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __f return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf.all_view()); } -template = 0> +template auto -__pattern_fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value) +__pattern_fill_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, const _T& __value) { return __pattern_walk1_async( - ::std::forward<_ExecutionPolicy>(__exec), + __tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), fill_functor<_T>{__value}); @@ -189,13 +190,12 @@ __pattern_fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa // transform_scan //------------------------------------------------------------------------ -template = 0> +template auto -__pattern_transform_scan_base_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, - _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, - _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_base_async(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _InitType __init, _BinaryOperation __binary_op, _Inclusive) { assert(__first < __last); @@ -206,39 +206,39 @@ __pattern_transform_scan_base_async(_ExecutionPolicy&& __exec, _Iterator1 __firs auto __buf2 = __keep2(__result, __result + __n); auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __unary_op, __init, - __binary_op, _Inclusive{}); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __unary_op, + __init, __binary_op, _Inclusive{}); return __res.__make_future(__result + __n); } -template = 0> +template auto -__pattern_transform_scan_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _Type __init, + _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - return __pattern_transform_scan_base_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); + return __pattern_transform_scan_base_async(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); } // scan without initial element -template = 0> +template auto -__pattern_transform_scan_async(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) +__pattern_transform_scan_async(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _BinaryOperation __binary_op, _Inclusive) { using _ValueType = typename ::std::iterator_traits<_Iterator1>::value_type; using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_ValueType>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - return __pattern_transform_scan_base_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - __unary_op, _InitType{}, __binary_op, _Inclusive{}); + return __pattern_transform_scan_base_async(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result, __unary_op, _InitType{}, __binary_op, _Inclusive{}); } } // namespace __internal diff --git a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h index 26eca467131..dfd4a969ec8 100644 --- a/include/oneapi/dpl/internal/async_impl/glue_async_impl.h +++ b/include/oneapi/dpl/internal/async_impl/glue_async_impl.h @@ -43,9 +43,11 @@ auto transform_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk2_async( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}); return ret_val; } @@ -59,9 +61,11 @@ transform_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI _ForwardIterator2 __first2, _ForwardIterator __result, _BinaryOperation __op, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk3_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op))); return ret_val; } @@ -73,10 +77,12 @@ auto copy_async(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _Events&&... __dependencies) { + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + wait_for_all(::std::forward<_Events>(__dependencies)...); auto ret_val = oneapi::dpl::__internal::__pattern_walk2_brick_async( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy{}); return ret_val; } @@ -93,8 +99,11 @@ sort_async(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Comp auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __par_backend_hetero::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), - __comp, oneapi::dpl::identity{}); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + using __backend_tag = typename decltype(__dispatch_tag)::__backend_tag; + + return __par_backend_hetero::__parallel_stable_sort(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __comp, oneapi::dpl::identity{}); } template (__dependencies)...); - auto ret_val = - oneapi::dpl::__internal::__pattern_walk1_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); + auto ret_val = oneapi::dpl::__internal::__pattern_walk1_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f); return ret_val; } @@ -130,10 +141,12 @@ auto reduce_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + wait_for_all(::std::forward<_Events>(__dependencies)...); - auto ret_val = oneapi::dpl::__internal::__pattern_transform_reduce_async(::std::forward<_ExecutionPolicy>(__exec), - __first, __last, __init, __binary_op, - oneapi::dpl::__internal::__no_op()); + auto ret_val = oneapi::dpl::__internal::__pattern_transform_reduce_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, + oneapi::dpl::__internal::__no_op()); return ret_val; } @@ -165,9 +178,11 @@ auto fill_async(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_fill_async(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __value); + return oneapi::dpl::__internal::__pattern_fill_async(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __value); } // [async.transform_reduce] @@ -180,9 +195,12 @@ auto transform_reduce_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt1 __last1, _ForwardIt2 __first2, _T __init, _BinaryOp1 __binary_op1, _BinaryOp2 __binary_op2, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_reduce_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, __binary_op2); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, + __binary_op2); } template (__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_reduce_async(::std::forward<_ExecutionPolicy>(__exec), __first, - __last, __init, __binary_op, __unary_op); + return oneapi::dpl::__internal::__pattern_transform_reduce_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, __unary_op); } template ::value_type; wait_for_all(::std::forward<_Events>(__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - ::std::plus<_ValueType>(), /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), ::std::plus<_ValueType>(), /*inclusive=*/::std::true_type()); } template (__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __binary_op, /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __binary_op, /*inclusive=*/::std::true_type()); } template (__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, __binary_op, /*inclusive=*/::std::true_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, __binary_op, /*inclusive=*/::std::true_type()); } template (__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, ::std::plus<_T>(), /*exclusive=*/::std::false_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, ::std::plus<_T>(), /*exclusive=*/::std::false_type()); } template (__dependencies)...); return oneapi::dpl::__internal::__pattern_transform_scan_async( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, oneapi::dpl::__internal::__no_op(), - __init, __binary_op, /*exclusive=*/::std::false_type()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + oneapi::dpl::__internal::__no_op(), __init, __binary_op, /*exclusive=*/::std::false_type()); } template (__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, + return oneapi::dpl::__internal::__pattern_transform_scan_async(__dispatch_tag, + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __init, __binary_op, /*exclusive=*/::std::false_type()); } @@ -299,10 +332,12 @@ transform_inclusive_scan_async(_ExecutionPolicy&& __exec, _ForwardIt1 __first1, _ForwardIt2 __first2, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _Events&&... __dependencies) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + wait_for_all(::std::forward<_Events>(__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, - __last1, __first2, __unary_op, __binary_op, - /*inclusive=*/::std::true_type()); + return oneapi::dpl::__internal::__pattern_transform_scan_async( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __binary_op, + /*inclusive=*/::std::true_type()); } template (__dependencies)...); - return oneapi::dpl::__internal::__pattern_transform_scan_async(::std::forward<_ExecutionPolicy>(__exec), __first1, + return oneapi::dpl::__internal::__pattern_transform_scan_async(__dispatch_tag, + ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __unary_op, __init, __binary_op, /*inclusive=*/::std::true_type()); } diff --git a/include/oneapi/dpl/internal/binary_search_impl.h b/include/oneapi/dpl/internal/binary_search_impl.h index 0c689fe6b8e..59c2f74ea82 100644 --- a/include/oneapi/dpl/internal/binary_search_impl.h +++ b/include/oneapi/dpl/internal/binary_search_impl.h @@ -68,36 +68,42 @@ struct custom_brick } }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +lower_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::lower_bound(start, end, val, comp) - start; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +upper_bound_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::upper_bound(start, end, val, comp) - start; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, +OutputIterator +binary_search_impl(_Tag, Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + return oneapi::dpl::transform(policy, value_start, value_end, result, [=](typename ::std::iterator_traits::reference val) { return ::std::binary_search(start, end, val, comp); @@ -105,11 +111,11 @@ binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, In } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +lower_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -128,18 +134,18 @@ lower_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, Inpu auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(_BackendTag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); return result + value_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +upper_bound_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -158,18 +164,18 @@ upper_bound_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, Inpu auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(_BackendTag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); return result + value_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, - InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) +template +OutputIterator +binary_search_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 start, InputIterator1 end, + InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { namespace __bknd = __par_backend_hetero; const auto size = ::std::distance(start, end); @@ -188,7 +194,7 @@ binary_search_impl(Policy&& policy, InputIterator1 start, InputIterator1 end, In auto keep_result = oneapi::dpl::__ranges::__get_sycl_range<__bknd::access_mode::read_write, OutputIterator>(); auto result_buf = keep_result(result, result + value_size); auto zip_vw = make_zip_view(input_buf.all_view(), value_buf.all_view(), result_buf.all_view()); - __bknd::__parallel_for(::std::forward(policy), + __bknd::__parallel_for(_BackendTag{}, ::std::forward(policy), custom_brick{comp, size}, value_size, zip_vw) .wait(); @@ -204,8 +210,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy lower_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::lower_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::lower_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template lower_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::lower_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, comp); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::lower_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Lower Bound end @@ -225,8 +236,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy upper_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::upper_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::upper_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template upper_bound(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::upper_bound_impl(::std::forward(policy), start, end, value_start, value_end, result, comp); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::upper_bound_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Upper Bound end @@ -247,8 +263,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy binary_search(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result) { - return internal::binary_search_impl(::std::forward(policy), start, end, value_start, value_end, result, - oneapi::dpl::__internal::__pstl_less()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::binary_search_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, oneapi::dpl::__internal::__pstl_less()); } template binary_search(Policy&& policy, InputIterator1 start, InputIterator1 end, InputIterator2 value_start, InputIterator2 value_end, OutputIterator result, StrictWeakOrdering comp) { - return internal::binary_search_impl(::std::forward(policy), start, end, value_start, value_end, result, - comp); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, start, value_start, result); + + return internal::binary_search_impl(__dispatch_tag, ::std::forward(policy), start, end, value_start, + value_end, result, comp); } //Binary search end diff --git a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h index 79d8239ea12..cd33872aa0d 100644 --- a/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/exclusive_scan_by_segment_impl.h @@ -37,12 +37,15 @@ class ExclusiveScan1; template class ExclusiveScan2; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) +template +OutputIterator +pattern_exclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, + Operator binary_op) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + const auto n = ::std::distance(first1, last1); // Check for empty and single element ranges @@ -61,7 +64,7 @@ pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI InputIterator2 last2 = first2 + n; // compute head flags - oneapi::dpl::__par_backend::__buffer _flags(n); + oneapi::dpl::__par_backend::__buffer _flags(policy, n); auto flags = _flags.get(); flags[0] = 1; @@ -69,7 +72,7 @@ pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI oneapi::dpl::__internal::__not_pred(binary_pred)); // shift input one to the right and initialize segments with init - oneapi::dpl::__par_backend::__buffer _temp(n); + oneapi::dpl::__par_backend::__buffer _temp(policy, n); auto temp = _temp.get(); temp[0] = init; @@ -91,22 +94,24 @@ pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, +template +OutputIterator +exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, ::std::true_type /* has_known_identity*/) { - return internal::__scan_by_segment_impl_common(::std::forward(policy), first1, last1, first2, result, init, - binary_pred, binary_op, ::std::false_type{}); + return internal::__scan_by_segment_impl_common(__tag, ::std::forward(policy), first1, last1, first2, result, + init, binary_pred, binary_op, ::std::false_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, +template +OutputIterator +exclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, ::std::false_type /* has_known_identity*/) { @@ -160,14 +165,15 @@ exclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIter return result + n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -pattern_exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) +template +OutputIterator +pattern_exclusive_scan_by_segment(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op) { return internal::exclusive_scan_by_segment_impl( - ::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, + __tag, ::std::forward(policy), first1, last1, first2, result, init, binary_pred, binary_op, typename unseq_backend::__has_known_identity< Operator, typename ::std::iterator_traits::value_type>::type{}); } @@ -181,8 +187,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy exclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op) { - return internal::pattern_exclusive_scan_by_segment(::std::forward(policy), first1, last1, first2, result, - init, binary_pred, binary_op); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, first1, first2, result); + + return internal::pattern_exclusive_scan_by_segment(__dispatch_tag, ::std::forward(policy), first1, last1, + first2, result, init, binary_pred, binary_op); } template class InclusiveScan1; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy -pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) +OutputIterator +pattern_inclusive_scan_by_segment(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, + BinaryOperator binary_op) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + const auto n = ::std::distance(first1, last1); // Check for empty and single element ranges @@ -56,7 +59,7 @@ pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI typedef unsigned int FlagType; typedef typename ::std::iterator_traits::value_type ValueType; - oneapi::dpl::__par_backend::__buffer _mask(n); + oneapi::dpl::__par_backend::__buffer _mask(policy, n); auto mask = _mask.get(); mask[0] = 1; @@ -72,24 +75,26 @@ pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputI } #if _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, +template +OutputIterator +inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op, ::std::true_type /* has_known_identity */) { using iter_value_t = typename ::std::iterator_traits::value_type; iter_value_t identity = unseq_backend::__known_identity; - return internal::__scan_by_segment_impl_common(::std::forward(policy), first1, last1, first2, result, + return internal::__scan_by_segment_impl_common(__tag, ::std::forward(policy), first1, last1, first2, result, identity, binary_pred, binary_op, ::std::true_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op, +template +OutputIterator +inclusive_scan_by_segment_impl(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op, ::std::false_type /* has_known_identity */) { @@ -123,14 +128,15 @@ inclusive_scan_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIter return result + n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -pattern_inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) +template +OutputIterator +pattern_inclusive_scan_by_segment(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, + BinaryPredicate binary_pred, BinaryOperator binary_op) { return internal::inclusive_scan_by_segment_impl( - ::std::forward(policy), first1, last1, first2, result, binary_pred, binary_op, + __tag, ::std::forward(policy), first1, last1, first2, result, binary_pred, binary_op, typename unseq_backend::__has_known_identity< BinaryOperator, typename ::std::iterator_traits::value_type>::type{}); } @@ -144,8 +150,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy inclusive_scan_by_segment(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryPredicate binary_pred, BinaryOperator binary_op) { - return internal::pattern_inclusive_scan_by_segment(::std::forward(policy), first1, last1, first2, result, - binary_pred, binary_op); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, first1, first2, result); + + return internal::pattern_inclusive_scan_by_segment(__dispatch_tag, ::std::forward(policy), first1, last1, + first2, result, binary_pred, binary_op); } template diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 0102d6c2925..78986bca302 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -78,13 +78,15 @@ class Reduce3; template class Reduce4; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy> -reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, +::std::pair +reduce_by_segment_impl(_Tag, Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, BinaryOperator binary_op) { + static_assert(__internal::__is_host_dispatch_tag_v<_Tag>); + // The algorithm reduces values in [first2, first2 + (last1-first1)) where the associated // keys for the values are equal to the adjacent key. This function's implementation is a derivative work // and responsible for the second copyright notice in this header. @@ -112,7 +114,7 @@ reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 la // buffer that is used to store a flag indicating if the associated key is not equal to // the next key, and thus its associated sum should be part of the final result - oneapi::dpl::__par_backend::__buffer _mask(n + 1); + oneapi::dpl::__par_backend::__buffer _mask(policy, n + 1); auto mask = _mask.get(); mask[0] = 1; @@ -128,11 +130,11 @@ reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 la // buffer stores the sums of values associated with a given key. Sums are copied with // a shift into result2, and the shift is computed at the same time as the sums, so the // sums can't be written to result2 directly. - oneapi::dpl::__par_backend::__buffer _scanned_values(n); + oneapi::dpl::__par_backend::__buffer _scanned_values(policy, n); // Buffer is used to store results of the scan of the mask. Values indicate which position // in result2 needs to be written with the scanned_values element. - oneapi::dpl::__par_backend::__buffer _scanned_tail_flags(n); + oneapi::dpl::__par_backend::__buffer _scanned_tail_flags(policy, n); // Compute the sum of the segments. scanned_tail_flags values are not used. inclusive_scan(policy, make_zip_iterator(first2, _mask.get()), make_zip_iterator(first2, _mask.get()) + n, @@ -188,12 +190,12 @@ template using _SegReducePrefixPhase = __seg_reduce_prefix_kernel<_Name...>; } // namespace -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, - _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, +template +oneapi::dpl::__internal::__difference_t<_Range3> +__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, + _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, + _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, ::std::false_type /* has_known_identity */) { return oneapi::dpl::experimental::ranges::reduce_by_segment( @@ -201,12 +203,12 @@ __sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& ::std::forward<_Range3>(__out_keys), ::std::forward<_Range4>(__out_values), __binary_pred, __binary_op); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, - _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, +template +oneapi::dpl::__internal::__difference_t<_Range3> +__sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __keys, + _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, + _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, ::std::true_type /* has_known_identity */) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -570,12 +572,12 @@ __sycl_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& return __end_idx.get_host_access()[0] + 1; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy> -reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator1 result1, OutputIterator2 result2, BinaryPred binary_pred, - BinaryOperator binary_op) +template +::std::pair +reduce_by_segment_impl(__internal::__hetero_tag<_BackendTag> __tag, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator1 result1, OutputIterator2 result2, + BinaryPred binary_pred, BinaryOperator binary_op) { // The algorithm reduces values in [first2, first2 + (last1-first1)) where the associated // keys for the values are equal to the adjacent key. @@ -609,9 +611,9 @@ reduce_by_segment_impl(Policy&& policy, InputIterator1 first1, InputIterator1 la typename ::std::iterator_traits::value_type>::type; // number of unique keys - _CountType __n = __sycl_reduce_by_segment(::std::forward(policy), key_buf.all_view(), value_buf.all_view(), - key_output_buf.all_view(), value_output_buf.all_view(), binary_pred, - binary_op, has_known_identity{}); + _CountType __n = __sycl_reduce_by_segment( + __tag, ::std::forward(policy), key_buf.all_view(), value_buf.all_view(), key_output_buf.all_view(), + value_output_buf.all_view(), binary_pred, binary_op, has_known_identity{}); return ::std::make_pair(result1 + __n, result2 + __n); } @@ -624,8 +626,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy(policy), first1, last1, first2, result1, result2, - binary_pred, binary_op); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(policy, first1, first2, result1, result2); + + return internal::reduce_by_segment_impl(__dispatch_tag, ::std::forward(policy), first1, last1, first2, + result1, result2, binary_pred, binary_op); } template using _SegScanPrefixPhase = __seg_scan_prefix_kernel<__is_inclusive, _Name...>; - template void - operator()(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_values, + operator()(_BackendTag, _ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op, _T __init, _T __identity) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -364,11 +364,12 @@ struct __sycl_scan_by_segment_impl } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy -__scan_by_segment_impl_common(Policy&& policy, InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, - OutputIterator result, T init, BinaryPredicate binary_pred, Operator binary_op, Inclusive) +template +OutputIterator +__scan_by_segment_impl_common(__internal::__hetero_tag<_BackendTag>, Policy&& policy, InputIterator1 first1, + InputIterator1 last1, InputIterator2 first2, OutputIterator result, T init, + BinaryPredicate binary_pred, Operator binary_op, Inclusive) { const auto n = ::std::distance(first1, last1); @@ -389,7 +390,7 @@ __scan_by_segment_impl_common(Policy&& policy, InputIterator1 first1, InputItera constexpr iter_value_t identity = unseq_backend::__known_identity; - __sycl_scan_by_segment_impl()(::std::forward(policy), key_buf.all_view(), + __sycl_scan_by_segment_impl()(_BackendTag{}, ::std::forward(policy), key_buf.all_view(), value_buf.all_view(), value_output_buf.all_view(), binary_pred, binary_op, init, identity); return result + n; diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 34d09140fb0..5af00e5425c 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -26,6 +26,11 @@ namespace dpl namespace __internal { +template +struct __parallel_tag; + +struct __parallel_forward_tag; + //------------------------------------------------------------------------ // any_of //------------------------------------------------------------------------ @@ -40,15 +45,13 @@ bool __brick_any_of(const _RandomAccessIterator, const _RandomAccessIterator, _Pred, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred, _IsVector, - /*parallel=*/::std::false_type) noexcept; +template +bool +__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred, _IsVector, - /*parallel=*/::std::true_type); +template +bool +__pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred); //------------------------------------------------------------------------ // walk1 (pseudo) @@ -64,34 +67,26 @@ template void __brick_walk1(_RandomAccessIterator, _RandomAccessIterator, _Function, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; +template +void +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type); +template +void +__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type); +template +void +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick, - /*parallel=*/::std::false_type) noexcept; +template +void +__pattern_walk_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Brick, - /*parallel=*/::std::true_type); +template +void +__pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Brick); //------------------------------------------------------------------------ // walk1_n @@ -105,25 +100,21 @@ template _RandomAccessIterator __brick_walk1_n(_RandomAccessIterator, _DifferenceType, _Function, /*vectorTag=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Function, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_walk1_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function); -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&&, _ForwardIterator, _Size, _Brick, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_walk_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Brick) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk_brick_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick); //------------------------------------------------------------------------ // walk2 (pseudo) @@ -147,65 +138,58 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Function, _IsVector, - /*parallel=*/::std::true_type); +template +_ForwardIterator2 +__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Function); + +template +_ForwardIterator2 +__pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Function); + +template +_ForwardIterator2 +__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, + _Function); + +template +_ForwardIterator2 +__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Brick) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Brick); template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Brick, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2, _Brick, - /*parallel=*/::std::true_type); +_ForwardIterator2 +__pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, + _ForwardIterator2, _Brick); + +template +_ForwardIterator2 +__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick) noexcept; + +template +_RandomAccessIterator2 +__pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, + _RandomAccessIterator2, _Brick); //------------------------------------------------------------------------ // walk3 (pseudo) @@ -222,50 +206,39 @@ _RandomAccessIterator3 __brick_walk3(_RandomAccessIterator1, _RandomAccessIterat _RandomAccessIterator3, _Function, /*vector=*/::std::true_type) noexcept; +template +_ForwardIterator3 +__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, + _Function) noexcept; + +template +_RandomAccessIterator3 +__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator3, _Function); + template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, - _Function, _IsVector, - /*parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - !__is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type); + class _Function> +_ForwardIterator3 +__pattern_walk3(__parallel_forward_tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator3, _Function); //------------------------------------------------------------------------ // transform_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, _IsVector __is_vector, - _IsParallel __is_parallel) noexcept; +template +_ForwardIterator2 +__pattern_walk2_transform_if(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _Function) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - _IsVector __is_vector, _IsParallel __is_parallel) noexcept; +template +_ForwardIterator3 +__pattern_walk3_transform_if(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator3, _Function) noexcept; //------------------------------------------------------------------------ // equal @@ -279,17 +252,16 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _BinaryPredicate, - _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); template bool __brick_equal(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate, @@ -299,17 +271,16 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +bool +__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // find_if @@ -323,15 +294,14 @@ template _RandomAccessIterator __brick_find_if(_RandomAccessIterator, _RandomAccessIterator, _Predicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_find_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_find_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Predicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_find_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Predicate); //------------------------------------------------------------------------ // find_end @@ -347,19 +317,16 @@ _RandomAccessIterator1 __brick_find_end(_RandomAccessIterator1, _RandomAccessIte _RandomAccessIterator2, _BinaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_end(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator1 +__pattern_find_end(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // find_first_of @@ -375,17 +342,16 @@ _RandomAccessIterator1 __brick_find_first_of(_RandomAccessIterator1, _RandomAcce _RandomAccessIterator2, _BinaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_first_of(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator1 +__pattern_find_first_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // search @@ -401,19 +367,16 @@ _RandomAccessIterator1 __brick_search(_RandomAccessIterator1, _RandomAccessItera _RandomAccessIterator2, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_search(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator1 +__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_search(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator1 +__pattern_search(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // search_n @@ -429,31 +392,28 @@ _RandomAccessIterator __brick_search_n(_RandomAccessIterator, _RandomAccessIterator, _Size, const _Tp&, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_search_n(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&, _BinaryPredicate, - IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&, + _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_search_n(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Size, const _Tp&, - _BinaryPredicate, IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Size, + const _Tp&, _BinaryPredicate); //------------------------------------------------------------------------ // copy_n //------------------------------------------------------------------------ -template +template struct __brick_copy_n; -template +template struct __brick_copy; -template +template struct __brick_move; //------------------------------------------------------------------------ @@ -510,16 +470,16 @@ void __brick_partition_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, _OutputIterator2, bool*, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryPredicate, _IsVector, - /*parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _UnaryPredicate, - _IsVector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _UnaryPredicate); //------------------------------------------------------------------------ // count @@ -535,17 +495,14 @@ typename ::std::iterator_traits<_ForwardIterator>::difference_type __brick_count(_ForwardIterator, _ForwardIterator, _Predicate, /* is_vector = */ ::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate, - /* is_parallel */ ::std::false_type, _IsVector) noexcept; +template +typename ::std::iterator_traits<_ForwardIterator>::difference_type +__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_RandomAccessIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Predicate, - /* is_parallel */ ::std::true_type, _IsVector); +template +typename ::std::iterator_traits<_RandomAccessIterator>::difference_type +__pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Predicate); //------------------------------------------------------------------------ // unique @@ -559,15 +516,14 @@ template _RandomAccessIterator __brick_unique(_RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_unique(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_unique(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _BinaryPredicate); //------------------------------------------------------------------------ // unique_copy @@ -581,11 +537,10 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryPredicate, - _IsVector, /*parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _BinaryPredicate) noexcept; template _DifferenceType @@ -597,11 +552,11 @@ _DifferenceType __brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate, /*vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _BinaryPredicate, _IsVector, /*parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _BinaryPredicate); //------------------------------------------------------------------------ // reverse @@ -623,15 +578,13 @@ template void __brick_reverse(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _IsVector, - /*is_parallel=*/::std::true_type); +template +void +__pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator); //------------------------------------------------------------------------ // reverse_copy @@ -645,15 +598,15 @@ template _OutputIterator __brick_reverse_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _OutputIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _OutputIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2); //------------------------------------------------------------------------ // rotate @@ -667,40 +620,36 @@ template _RandomAccessIterator __brick_rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_rotate(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator); //------------------------------------------------------------------------ // rotate_copy //------------------------------------------------------------------------ -template -_OutputIterator __brick_rotate_copy(_ForwardIterator, _ForwardIterator, _ForwardIterator, _OutputIterator, - /*__is_vector=*/::std::false_type) noexcept; +template +_OutputIterator __brick_rotate_copy(_Tag, _ForwardIterator, _ForwardIterator, _ForwardIterator, + _OutputIterator) noexcept; -template -_OutputIterator __brick_rotate_copy(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _OutputIterator, - /*__is_vector=*/::std::true_type) noexcept; +template +_OutputIterator __brick_rotate_copy(__parallel_tag<_IsVector>, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _OutputIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, _OutputIterator, - _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator, + _OutputIterator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _OutputIterator, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator2 +__pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2); //------------------------------------------------------------------------ // is_partitioned @@ -714,15 +663,14 @@ template bool __brick_is_partitioned(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +bool +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); //------------------------------------------------------------------------ // partition @@ -736,15 +684,14 @@ template _RandomAccessIterator __brick_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_partition(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_ForwardIterator +__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partition(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); //------------------------------------------------------------------------ // stable_partition @@ -758,16 +705,15 @@ template _RandomAccessIterator __brick_stable_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _UnaryPredicate, - _IsVector, - /*is_parallelization=*/::std::false_type) noexcept; +template +_BidirectionalIterator +__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::true_type); +template +_RandomAccessIterator +__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); //------------------------------------------------------------------------ // partition_copy @@ -784,104 +730,88 @@ ::std::pair<_OutputIterator1, _OutputIterator2> __brick_partition_copy(_RandomAc _UnaryPredicate, /*is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1, _OutputIterator2, - _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::false_type) noexcept; +template +::std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1, + _OutputIterator2, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, - _OutputIterator2, _UnaryPredicate, _IsVector, - /*is_parallelization=*/::std::true_type); +template +::std::pair<_RandomAccessIterator2, _RandomAccessIterator3> +__pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator3, _UnaryPredicate); //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, - /*is_parallel=*/::std::false_type, _IsMoveConstructible) noexcept; +template +void +__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, + _IsMoveConstructible) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector /*is_vector*/, - /*is_parallel=*/::std::true_type, +template +void +__pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, /*is_move_constructible=*/::std::true_type); //------------------------------------------------------------------------ // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector /*is_vector*/, - /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector /*is_vector*/, - /*is_parallel=*/::std::true_type); +template +void +__pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // sort_by_key //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::true_type); +template +void +__pattern_sort_by_key(_Tag, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, + _Compare) noexcept; + +template +void +__pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _Compare); //------------------------------------------------------------------------ // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +void +__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +void +__pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); //------------------------------------------------------------------------ // partial_sort_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare) noexcept; //------------------------------------------------------------------------ // adjacent_find @@ -897,59 +827,54 @@ _ForwardIterator __brick_adjacent_find(_ForwardIterator, _ForwardIterator, _BinaryPredicate, /* IsVector = */ ::std::false_type, bool) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, - /* is_parallel */ ::std::false_type, _IsVector, _Semantic) noexcept; +template +_ForwardIterator +__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, + _Semantic) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate, - /* is_parallel */ ::std::true_type, _IsVector, _Semantic); +template +_RandomAccessIterator +__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _BinaryPredicate, _Semantic); //------------------------------------------------------------------------ // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector, - /*is_parallel=*/::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, - _IsVector, - /*is_parallel=*/::std::true_type); +template +void +__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, + _Compare) noexcept; + +template +void +__pattern_nth_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); //------------------------------------------------------------------------ // fill, fill_n //------------------------------------------------------------------------ -template +template struct __brick_fill; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_fill(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +void +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_fill(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&, - /*is_parallel=*/::std::true_type, _IsVector); +template +_RandomAccessIterator +__pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&); -template +template struct __brick_fill_n; -template +template _OutputIterator -__pattern_fill_n(_ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; +__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&) noexcept; -template +template _RandomAccessIterator -__pattern_fill_n(_ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&, - /*is_parallel=*/::std::true_type, _IsVector); +__pattern_fill_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&); //------------------------------------------------------------------------ // generate, generate_n @@ -963,15 +888,14 @@ template void __brick_generate(_ForwardIterator, _ForwardIterator, _Generator, /* is_vector = */ ::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_generate(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +void +__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_generate(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Generator, - /*is_parallel=*/::std::true_type, _IsVector); +template +_RandomAccessIterator +__pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Generator); template _RandomAccessIterator __brick_generate_n(_RandomAccessIterator, Size, _Generator, @@ -981,15 +905,13 @@ template OutputIterator __brick_generate_n(OutputIterator, Size, _Generator, /* is_vector = */ ::std::false_type) noexcept; -template -OutputIterator -__pattern_generate_n(_ExecutionPolicy&&, OutputIterator, Size, _Generator, - /*is_parallel=*/::std::false_type, _IsVector) noexcept; +template +_OutputIterator +__pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, _Generator) noexcept; -template +template _RandomAccessIterator -__pattern_generate_n(_ExecutionPolicy&&, _RandomAccessIterator, Size, _Generator, - /*is_parallel=*/::std::true_type, _IsVector); +__pattern_generate_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Generator); //------------------------------------------------------------------------ // remove @@ -1002,15 +924,14 @@ template _RandomAccessIterator __brick_remove_if(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate, _IsVector, - /*is_parallel*/ ::std::false_type) noexcept; +template +_ForwardIterator +__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_remove_if(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate, _IsVector, - /*is_parallel*/ ::std::true_type); +template +_RandomAccessIterator +__pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _UnaryPredicate); //------------------------------------------------------------------------ // merge @@ -1026,18 +947,17 @@ _OutputIterator __brick_merge(_RandomAccessIterator1, _RandomAccessIterator1, _R _RandomAccessIterator2, _OutputIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /* is_parallel = */ ::std::false_type) noexcept; +template +_OutputIterator +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator3 +__pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator3, _Compare); //------------------------------------------------------------------------ // inplace_merge @@ -1051,34 +971,30 @@ template void __brick_inplace_merge(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, _BidirectionalIterator, - _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +void +__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + _BidirectionalIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, - _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +void +__pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _RandomAccessIterator, _Compare); //------------------------------------------------------------------------ // includes //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +bool +__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +bool +__pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Compare); //------------------------------------------------------------------------ // set_union @@ -1094,17 +1010,17 @@ _OutputIterator __brick_set_union(_RandomAccessIterator1, _RandomAccessIterator1 _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::true_type); +template +_OutputIterator +__pattern_set_union(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare); //------------------------------------------------------------------------ // set_intersection @@ -1120,19 +1036,18 @@ _OutputIterator __brick_set_intersection(_RandomAccessIterator1, _RandomAccessIt _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_intersection(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _RandomAccessIterator3, _Compare); //------------------------------------------------------------------------ // set_difference @@ -1148,18 +1063,17 @@ _OutputIterator __brick_set_difference(_RandomAccessIterator1, _RandomAccessIter _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _OutputIterator, _Compare, _IsVector, /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _RandomAccessIterator3, _Compare); //------------------------------------------------------------------------ // set_symmetric_difference @@ -1175,19 +1089,18 @@ _OutputIterator __brick_set_symmetric_difference(_RandomAccessIterator1, _Random _RandomAccessIterator2, _OutputIterator, _Compare, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_OutputIterator +__pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _OutputIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, - _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare, _IsVector, - /*is_parallel=*/::std::true_type); +template +_RandomAccessIterator3 +__pattern_set_symmetric_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _RandomAccessIterator3, _Compare); //------------------------------------------------------------------------ // is_heap_until @@ -1201,15 +1114,14 @@ template _RandomAccessIterator __brick_is_heap_until(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +_RandomAccessIterator +__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator +__pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // is_heap @@ -1223,15 +1135,14 @@ template bool __brick_is_heap(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +bool +__pattern_is_heap(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // min_element @@ -1245,15 +1156,14 @@ template _RandomAccessIterator __brick_min_element(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_min_element(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +_ForwardIterator +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_min_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +_RandomAccessIterator +__pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // minmax_element @@ -1268,17 +1178,14 @@ ::std::pair<_RandomAccessIterator, _RandomAccessIterator> __brick_minmax_element(_RandomAccessIterator, _RandomAccessIterator, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator, _ForwardIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +::std::pair<_ForwardIterator, _ForwardIterator> +__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator, _RandomAccessIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +::std::pair<_RandomAccessIterator, _RandomAccessIterator> +__pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Compare); //------------------------------------------------------------------------ // mismatch @@ -1295,19 +1202,16 @@ ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> _Predicate, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator1, _ForwardIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, - _Predicate, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +::std::pair<_ForwardIterator1, _ForwardIterator2> +__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, + _Predicate) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _RandomAccessIterator2, _Predicate, _IsVector, /* is_parallel = */ ::std::true_type); +template +::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__pattern_mismatch(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, + _RandomAccessIterator2, _RandomAccessIterator2, _Predicate); //------------------------------------------------------------------------ // lexicographical_compare @@ -1323,24 +1227,21 @@ bool __brick_lexicographical_compare(_RandomAccessIterator1, _RandomAccessIterat _RandomAccessIterator2, _Compare, /* __is_vector = */ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator2, _Compare, _IsVector, - /* is_parallel = */ ::std::false_type) noexcept; +template +bool +__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, + _ForwardIterator2, _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, - _RandomAccessIterator2, _RandomAccessIterator2, _Compare, _IsVector, - /* is_parallel = */ ::std::true_type); +template +bool +__pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, + _Compare) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function, _IsVector, - _IsParallel); +template +_ForwardIterator2 +__pattern_swap(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function); //------------------------------------------------------------------------ // shift_left @@ -1356,23 +1257,24 @@ _ForwardIterator __brick_shift_left(_ForwardIterator, _ForwardIterator, typename ::std::iterator_traits<_ForwardIterator>::difference_type, /*__is_vector=*/::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, - typename ::std::iterator_traits<_ForwardIterator>::difference_type, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, - typename ::std::iterator_traits<_ForwardIterator>::difference_type, _IsVector, - /*is_parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_shift_right(_ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, - typename ::std::iterator_traits<_BidirectionalIterator>::difference_type, _IsVector, - _IsParallel is_parallel); +template +_ForwardIterator +__pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, + typename ::std::iterator_traits<_ForwardIterator>::difference_type) noexcept; + +template +_RandomAccessIterator +__pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + typename ::std::iterator_traits<_RandomAccessIterator>::difference_type); + +//------------------------------------------------------------------------ +// shift_right +//------------------------------------------------------------------------ + +template +_BidirectionalIterator +__pattern_shift_right(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator, + typename ::std::iterator_traits<_BidirectionalIterator>::difference_type); } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 7d3e07c4ec0..d0be4420f8c 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -63,23 +63,24 @@ __brick_any_of(const _RandomAccessIterator __first, const _RandomAccessIterator return __unseq_backend::__simd_or(__first, __last - __first, __pred); }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred, - _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +bool +__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) noexcept { - return __internal::__brick_any_of(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_any_of(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Pred __pred, - _IsVector __is_vector, /*parallel=*/::std::true_type) +template +bool +__pattern_any_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Pred __pred) { return __internal::__except_handler([&]() { - return __internal::__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__brick_any_of(__i, __j, __pred, __is_vector); + return __internal::__parallel_or(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__brick_any_of(__i, __j, __pred, _IsVector{}); }); }); } @@ -134,64 +135,66 @@ __brick_walk1(_DifferenceType __n, _Function __f, ::std::true_type) noexcept oneapi::dpl::__internal::__brick_walk1(__n, __f, ::std::false_type{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::false_type) noexcept +template +void +__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f) noexcept { - __internal::__brick_walk1(__first, __last, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag>); + + __internal::__brick_walk1(__first, __last, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f, - _IsVector __is_vector, - /*parallel=*/::std::true_type) +template +void +__pattern_walk1(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + + typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; + auto __func = [&__f](_ReferenceType arg) { __f(arg); }; __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__f, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __internal::__brick_walk1(__i, __j, __f, __is_vector); - }); + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __func); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - !__is_random_access_iterator_v<_ForwardIterator>> -__pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, _IsVector, - /*parallel=*/::std::true_type) +template +void +__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Function __f) { - typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - auto __func = [&__f](_ReferenceType arg) { __f(arg); }; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __func); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__f](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __internal::__brick_walk1(__i, __j, __f, _IsVector{}); + }); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Brick __brick, - /*parallel=*/::std::false_type) noexcept +template +void +__pattern_walk_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Brick __brick) noexcept { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - __brick(__first, __last, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __brick(__first, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Brick __brick, - /*parallel=*/::std::true_type) +template +void +__pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Brick __brick) { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __brick(__i, __j, __is_vector); - }); + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j, _IsVector{}); }); }); } @@ -214,45 +217,45 @@ __brick_walk1_n(_RandomAccessIterator __first, _DifferenceType __n, _Function __ return __unseq_backend::__simd_walk_1(__first, __n, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f) noexcept { - return __internal::__brick_walk1_n(__first, __n, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_walk1_n(__first, __n, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk1_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Function __f, - _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_walk1_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, + _Function __f) { - oneapi::dpl::__internal::__pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f, - __is_vector, ::std::true_type()); + oneapi::dpl::__internal::__pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, + __f); return __first + __n; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_walk_brick_n(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Brick __brick) noexcept { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - return __brick(__first, __n, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick(__first, __n, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, _Brick __brick, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n, + _Brick __brick) { - const auto __is_vector = __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, - [__brick, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __brick(__i, __j - __i, __is_vector); - }); + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, + [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j - __i, _IsVector{}); }); return __first + __n; }); } @@ -299,38 +302,41 @@ __brick_walk2_n(_RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterato return __unseq_backend::__simd_walk_2(__first1, __n, __first2, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _Function __f, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator2 +__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Function __f) noexcept { - return __internal::__brick_walk2(__first1, __last1, __first2, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag>); + + return __internal::__brick_walk2(__first1, __last1, __first2, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__f, __first1, __first2, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, _IsVector{}); }); return __first2 + (__last1 - __first1); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector, /*parallel=*/::std::true_type) +template +_ForwardIterator2 +__pattern_walk2(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + return __internal::__except_handler([&]() { using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2>; auto __begin = _iterator_tuple(__first1, __first2); @@ -339,7 +345,7 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, [&__f](::std::tuple<_ReferenceType1, _ReferenceType2> __val) { __f(::std::get<0>(__val), ::std::get<1>(__val)); }); @@ -352,51 +358,50 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f, - _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator2 +__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, + _Function __f) noexcept { - return __internal::__brick_walk2_n(__first1, __n, __first2, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_walk2_n(__first1, __n, __first2, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, - _RandomAccessIterator2 __first2, _Function __f, _IsVector __is_vector, /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_walk2_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _Size __n, _RandomAccessIterator2 __first2, _Function __f) { - return __internal::__pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, - __f, __is_vector, ::std::true_type()); + return __internal::__pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + __first2, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator2 +__pattern_walk2_brick(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Brick __brick) noexcept { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(__exec); - return __brick(__first1, __last1, __first2, __is_vector); + static_assert(__is_serial_tag_v<_Tag>); + + return __brick(__first1, __last1, __first2, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2>, - _RandomAccessIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick) { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>( - __exec); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; return __except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __brick(__i, __j, __first2 + (__i - __first1), __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __brick(__i, __j, __first2 + (__i - __first1), _IsVector{}); }); return __first2 + (__last1 - __first1); }); @@ -404,11 +409,12 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1 //TODO: it postponed till adding more or less effective parallel implementation template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2>, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) +_ForwardIterator2 +__pattern_walk2_brick(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2>; auto __begin = _iterator_tuple(__first1, __first2); auto __end = _iterator_tuple(__last1, /*dummy parameter*/ _ForwardIterator2()); @@ -417,7 +423,7 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; return __except_handler([&]() { - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, [__brick](::std::tuple<_ReferenceType1, _ReferenceType2> __val) { __brick(::std::get<0>(__val), ::std::forward<_ReferenceType2>(::std::get<1>(__val))); @@ -431,33 +437,33 @@ __pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Fo }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _Size __n, - _RandomAccessIterator2 __first2, _Brick __brick, /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _Size __n, _RandomAccessIterator2 __first2, _Brick __brick) { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, _RandomAccessIterator2>( - __exec); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; return __except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [&__is_vector, __first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - __brick(__i, __j - __i, __first2 + (__i - __first1), __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __brick(__i, __j - __i, __first2 + (__i - __first1), _IsVector{}); }); return __first2 + __n; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Brick __brick, /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator2 +__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, + _ForwardIterator2 __first2, _Brick __brick) noexcept { - const auto __is_vector = - __internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>(__exec); - return __brick(__first1, __n, __first2, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick(__first1, __n, __first2, typename _Tag::__is_vector{}); } //------------------------------------------------------------------------ @@ -483,46 +489,45 @@ __brick_walk3(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ return __unseq_backend::__simd_walk_3(__first1, __last1 - __first1, __first2, __first3, __f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::false_type) noexcept +template +_ForwardIterator3 +__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) noexcept { - return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, __is_vector); + static_assert(__is_serial_tag_v<_Tag>); + + return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, - __is_random_access_iterator_v<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3>, - _RandomAccessIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, _Function __f, _IsVector __is_vector, - /*parallel=*/::std::true_type) +template +_RandomAccessIterator3 +__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3, + _Function __f) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__f, __first1, __first2, __first3, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__f, __first1, __first2, __first3](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f, - __is_vector); + _IsVector{}); }); return __first3 + (__last1 - __first1); }); } template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !__is_random_access_iterator_v<_ForwardIterator1, _ForwardIterator2, _ForwardIterator3>, - _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f, _IsVector, - /*parallel=*/::std::true_type) + class _Function> +_ForwardIterator3 +__pattern_walk3(__parallel_forward_tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) { + using __backend_tag = typename __parallel_forward_tag::__backend_tag; + return __internal::__except_handler([&]() { using _iterator_tuple = zip_forward_iterator<_ForwardIterator1, _ForwardIterator2, _ForwardIterator3>; auto __begin = _iterator_tuple(__first1, __first2, __first3); @@ -533,7 +538,7 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; typedef typename ::std::iterator_traits<_ForwardIterator3>::reference _ReferenceType3; - __par_backend::__parallel_for_each(::std::forward<_ExecutionPolicy>(__exec), __begin, __end, + __par_backend::__parallel_for_each(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __begin, __end, [&](::std::tuple<_ReferenceType1, _ReferenceType2, _ReferenceType3> __val) { __f(::std::get<0>(__val), ::std::get<1>(__val), ::std::get<2>(__val)); }); @@ -550,26 +555,28 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI // transform_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, _IsVector __is_vector, - _IsParallel __is_parallel) noexcept +template +_ForwardIterator2 +__pattern_walk2_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __func) noexcept { - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __func, __is_vector, - __is_parallel); + static_assert(__is_host_dispatch_tag_v<_Tag>); + + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __func); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - _IsVector __is_vector, _IsParallel __is_parallel) noexcept +template +_ForwardIterator3 +__pattern_walk3_transform_if(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, + _Function __func) noexcept { - return __pattern_walk3(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __first3, __func, - __is_vector, __is_parallel); + static_assert(__is_host_dispatch_tag_v<_Tag>); + + return __pattern_walk3(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __first3, + __func); } //------------------------------------------------------------------------ @@ -596,32 +603,32 @@ __brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ .first == __last1; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _BinaryPredicate __p, _IsVector __is_vector, /* is_parallel = */ - ::std::false_type) noexcept +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __p) noexcept { - return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _BinaryPredicate __p, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +bool +__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _BinaryPredicate __p) { if (__last1 - __first1 != __last2 - __first2) return false; return __internal::__except_handler([&]() { return !__internal::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - __p, __is_vector); + __p, _IsVector{}); }); }); } @@ -647,27 +654,27 @@ __brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ .first == __last1; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _BinaryPredicate __p, _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +bool +__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _BinaryPredicate __p) noexcept { - return __internal::__brick_equal(__first1, __last1, __first2, __p, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_equal(__first1, __last1, __first2, __p, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _BinaryPredicate __p, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +bool +__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _BinaryPredicate __p) { return __internal::__except_handler([&]() { return !__internal::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - [__first1, __first2, __p, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, _IsVector{}); }); }); } @@ -694,27 +701,28 @@ __brick_find_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _Pr [&__pred](_RandomAccessIterator __it, _SizeType __i) { return __pred(__it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_find_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, - _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Predicate __pred) noexcept { - return __internal::__brick_find_if(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_find_if(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_find_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Predicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Predicate __pred) { return __except_handler([&]() { - return __parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __brick_find_if(__i, __j, __pred, __is_vector); - }, - ::std::true_type{}); + return __parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __brick_find_if(__i, __j, __pred, _IsVector{}); + }, + ::std::true_type{}); }); } @@ -836,40 +844,39 @@ __brick_find_end(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, return __find_subrange(__first, __last, __last, __s_first, __s_last, __pred, false, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_end(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, - _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator1 +__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept { - return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_find_end(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator1 +__pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, + _BinaryPredicate __pred) { if (__last - __first == __s_last - __s_first) { - const bool __res = __internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __s_first, __pred, __is_vector, ::std::true_type()); + const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); return __res ? __first : __last; } else { return __internal::__except_handler([&]() { - return __internal::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __s_first, __s_last, __pred, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__find_subrange(__i, __j, __last, __s_first, - __s_last, __pred, false, - __is_vector); - }, - ::std::false_type{}); + return __internal::__parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false, + _IsVector{}); + }, + ::std::false_type{}); }); } } @@ -893,28 +900,29 @@ __brick_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _Forw return __unseq_backend::__simd_find_first_of(__first, __last, __s_first, __s_last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_first_of(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator1 +__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept { - return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator1 +__pattern_find_first_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, + _RandomAccessIterator2 __s_last, _BinaryPredicate __pred) { return __internal::__except_handler([&]() { return __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__s_first, __s_last, &__pred, __is_vector](_ForwardIterator1 __i, _ForwardIterator1 __j) { - return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__s_first, __s_last, &__pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, _IsVector{}); }, ::std::true_type{}); }); @@ -939,41 +947,39 @@ __brick_search(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIter return __internal::__find_subrange(__first, __last, __last, __s_first, __s_last, __pred, true, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator1> -__pattern_search(_ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, - _ForwardIterator2 __s_last, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator1 +__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last, + _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept { - return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator1> -__pattern_search(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, - _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator1 +__pattern_search(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, + _BinaryPredicate __pred) { if (__last - __first == __s_last - __s_first) { - const bool __res = __internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __s_first, __pred, __is_vector, ::std::true_type()); + const bool __res = __internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __s_first, __pred); return __res ? __first : __last; } else { return __internal::__except_handler([&]() { - return __internal::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __s_first, __s_last, __pred, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__find_subrange(__i, __j, __last, __s_first, - __s_last, __pred, true, - __is_vector); - }, - ::std::true_type{}); + return __internal::__parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true, + _IsVector{}); + }, + /*_IsFirst=*/::std::true_type{}); }); } } @@ -997,38 +1003,36 @@ __brick_search_n(_RandomAccessIterator __first, _RandomAccessIterator __last, _S return __internal::__find_subrange(__first, __last, __last, __count, __value, __pred, ::std::true_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_search_n(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count, - const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count, + const _Tp& __value, _BinaryPredicate __pred) noexcept { - return __internal::__brick_search_n(__first, __last, __count, __value, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_search_n(__first, __last, __count, __value, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { if (static_cast<_Size>(__last - __first) == __count) { - const bool __result = !__internal::__pattern_any_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }, __is_vector, - /*is_parallel*/ ::std::true_type()); + const bool __result = + !__internal::__pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); }); return __result ? __first : __last; } else { - return __internal::__except_handler([&__exec, __first, __last, __count, &__value, __pred, __is_vector]() { + return __internal::__except_handler([__tag, &__exec, __first, __last, __count, &__value, __pred]() { return __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__last, __count, &__value, __pred, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__last, __count, &__value, __pred](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, _IsVector{}); }, ::std::true_type{}); }); @@ -1042,8 +1046,9 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ra // clear that doing so is worth the trouble and extra layers of call chain. // Sometimes a little duplication for sake of regularity is better than the alternative. -template -struct __brick_copy_n<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_copy_n<_Tag, _ExecutionPolicy, + ::std::enable_if_t>> { template _RandomAccessIterator2 @@ -1067,10 +1072,9 @@ struct __brick_copy_n<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_hos // copy //------------------------------------------------------------------------ -template -struct __brick_copy<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_copy<_Tag, _ExecutionPolicy, ::std::enable_if_t<__is_host_dispatch_tag_v<_Tag>>> { - template _RandomAccessIterator2 operator()(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, @@ -1100,10 +1104,9 @@ struct __brick_copy<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_ // move //------------------------------------------------------------------------ -template -struct __brick_move<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_move<_Tag, _ExecutionPolicy, ::std::enable_if_t<__is_host_dispatch_tag_v<_Tag>>> { - template _RandomAccessIterator2 operator()(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, @@ -1120,14 +1123,17 @@ struct __brick_move<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_ { return ::std::move(__first, __last, __result); } -}; -template -struct __brick_move_destroy; + template + void + operator()(_ReferenceType1&& __val, _ReferenceType2&& __result) const + { + __result = ::std::move(__val); + } +}; -template -struct __brick_move_destroy<_ExecutionPolicy, - oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template >> +struct __brick_move_destroy { template _RandomAccessIterator2 @@ -1295,48 +1301,51 @@ __brick_partition_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_copy_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, - _UnaryPredicate __pred, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_copy_if(__first, __last, __result, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_copy_if(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __result, _UnaryPredicate __pred, _IsVector __is_vector, - /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; if (_DifferenceType(1) < __n) { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); - return __internal::__except_handler([&__exec, __n, __first, __result, __is_vector, __pred, &__mask_buf]() { + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n); + return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector) + __mask + __i, __pred, _IsVector{}) .first; }, ::std::plus<_DifferenceType>(), // Combine [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan __internal::__brick_copy_by_mask( __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, - [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, __is_vector); + [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{}); }, [&__m](_DifferenceType __total) { __m = __total; }); return __result + __m; }); } // trivial sequence - use serial algorithm - return __internal::__brick_copy_if(__first, __last, __result, __pred, __is_vector); + return __internal::__brick_copy_if(__first, __last, __result, __pred, _IsVector{}); } //------------------------------------------------------------------------ @@ -1358,22 +1367,22 @@ __brick_count(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pr return ::std::count_if(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> -__pattern_count(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, - /* is_parallel */ ::std::false_type, _IsVector __is_vector) noexcept +template +typename ::std::iterator_traits<_ForwardIterator>::difference_type +__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) noexcept { - return __internal::__brick_count(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_count(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_RandomAccessIterator>::difference_type> -__pattern_count(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Predicate __pred, - /* is_parallel */ ::std::true_type, _IsVector __is_vector) +template +typename ::std::iterator_traits<_RandomAccessIterator>::difference_type +__pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Predicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _SizeType; //trivial pre-checks @@ -1382,9 +1391,10 @@ __pattern_count(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rando return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), - [__pred, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) - -> _SizeType { return __value + __internal::__brick_count(__begin, __end, __pred, __is_vector); }, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0), + [__pred](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) -> _SizeType { + return __value + __internal::__brick_count(__begin, __end, __pred, _IsVector{}); + }, ::std::plus<_SizeType>()); }); } @@ -1410,32 +1420,36 @@ __brick_unique(_RandomAccessIterator __first, _RandomAccessIterator __last, _Bin return ::std::unique(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_unique(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __pred) noexcept { - return __internal::__brick_unique(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_unique(__first, __last, __pred, typename _Tag::__is_vector{}); } // That function is shared between two algorithms - remove_if (__pattern_remove_if) and unique (pattern unique). But a mask calculation is different. // So, a caller passes _CalcMask brick into remove_elements. -template -_ForwardIterator -__remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _CalcMask __calc_mask, - _IsVector __is_vector) +template +_RandomAccessIterator +__remove_elements(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _CalcMask __calc_mask) { - typedef typename ::std::iterator_traits<_ForwardIterator>::difference_type _DifferenceType; - typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _Tp; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; _DifferenceType __n = __last - __first; - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n); // 1. find a first iterator that should be removed return __internal::__except_handler([&]() { bool* __mask = __mask_buf.get(); _DifferenceType __min = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, - [__first, __mask, &__calc_mask, __is_vector](_DifferenceType __i, _DifferenceType __j, - _DifferenceType __local_min) -> _DifferenceType { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n, + [__first, __mask, &__calc_mask](_DifferenceType __i, _DifferenceType __j, + _DifferenceType __local_min) -> _DifferenceType { // Create mask __calc_mask(__mask + __i, __mask + __j, __first + __i); @@ -1445,8 +1459,8 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI return __local_min; } // find first iterator that should be removed - bool* __result = __internal::__brick_find_if(__mask + __i, __mask + __j, - [](bool __val) { return !__val; }, __is_vector); + bool* __result = __internal::__brick_find_if( + __mask + __i, __mask + __j, [](bool __val) { return !__val; }, _IsVector{}); if (__result - __mask == __j) { return __local_min; @@ -1465,45 +1479,45 @@ __remove_elements(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardI __n -= __min; __first += __min; - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n); + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __n); _Tp* __result = __buf.get(); __mask += __min; _DifferenceType __m{}; // 2. Elements that doesn't satisfy pred are moved to result __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), - [__mask, __is_vector](_DifferenceType __i, _DifferenceType __len) { - return __internal::__brick_count(__mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, - __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + [__mask](_DifferenceType __i, _DifferenceType __len) { + return __internal::__brick_count( + __mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, _IsVector{}); }, ::std::plus<_DifferenceType>(), [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { __internal::__brick_copy_by_mask( __first + __i, __first + __i + __len, __result + __initial, __mask + __i, - [](_ForwardIterator __x, _Tp* __z) { + [](_RandomAccessIterator __x, _Tp* __z) { if constexpr (::std::is_trivial_v<_Tp>) *__z = ::std::move(*__x); else ::new (::std::addressof(*__z)) _Tp(::std::move(*__x)); }, - __is_vector); + _IsVector{}); }, [&__m](_DifferenceType __total) { __m = __total; }); // 3. Elements from result are moved to [first, last) - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, - [__result, __first, __is_vector](_Tp* __i, _Tp* __j) { - __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __result), - __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + __m, [__result, __first](_Tp* __i, _Tp* __j) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __i, __j, __first + (__i - __result), _IsVector{}); }); return __first + __m; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_unique(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _BinaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_unique(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _BinaryPredicate __pred) { typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; @@ -1514,16 +1528,15 @@ __pattern_unique(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand if (__first + 1 == __last || __first + 2 == __last) { // Trivial sequence - use serial algorithm - return __internal::__brick_unique(__first, __last, __pred, __is_vector); + return __internal::__brick_unique(__first, __last, __pred, _IsVector{}); } return __internal::__remove_elements( - ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, - [&__pred, __is_vector](bool* __b, bool* __e, _RandomAccessIterator __it) { + __tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, + [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { __internal::__brick_walk3( __b, __e, __it - 1, __it, - [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, __is_vector); - }, - __is_vector); + [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, _IsVector{}); + }); } //------------------------------------------------------------------------ @@ -1550,13 +1563,14 @@ __brick_unique_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __las #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_unique_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, - _BinaryPredicate __pred, _IsVector __is_vector, /*parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _BinaryPredicate __pred) noexcept { - return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_unique_copy(__first, __last, __result, __pred, typename _Tag::__is_vector{}); } template @@ -1581,25 +1595,26 @@ __brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, return __unseq_backend::__simd_calc_mask_2(__first, __last - __first, __mask, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __result, _BinaryPredicate __pred, _IsVector __is_vector, - /*parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _BinaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; if (_DifferenceType(2) < __n) { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n); if (_DifferenceType(2) < __n) { - return __internal::__except_handler([&__exec, __n, __first, __result, __pred, __is_vector, &__mask_buf]() { + return __internal::__except_handler([&__exec, __n, __first, __result, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _DifferenceType __m{}; __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0), [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce _DifferenceType __extra = 0; if (__i == 0) @@ -1612,7 +1627,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, ++__extra; } return __internal::__brick_calc_mask_2<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector) + + __mask + __i, __pred, _IsVector{}) + __extra; }, ::std::plus<_DifferenceType>(), // Combine @@ -1620,7 +1635,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, // Phase 2 is same as for __pattern_copy_if __internal::__brick_copy_by_mask( __first + __i, __first + (__i + __len), __result + __initial, __mask + __i, - [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, __is_vector); + [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{}); }, [&__m](_DifferenceType __total) { __m = __total; }); return __result + __m; @@ -1628,7 +1643,7 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, } } // trivial sequence - use serial algorithm - return __internal::__brick_unique_copy(__first, __last, __result, __pred, __is_vector); + return __internal::__brick_unique_copy(__first, __last, __result, __pred, _IsVector{}); } //------------------------------------------------------------------------ @@ -1685,27 +1700,29 @@ __brick_reverse(_RandomAccessIterator __first, _RandomAccessIterator __last, _Ra }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _IsVector _is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last) noexcept { - __internal::__brick_reverse(__first, __last, _is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __internal::__brick_reverse(__first, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +void +__pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__first == __last) return; __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, - [__is_vector, __first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { - __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, + [__first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { + __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), _IsVector{}); }); } @@ -1733,31 +1750,34 @@ __brick_reverse_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __la __d_first, [](_ReferenceType1 __x, _ReferenceType2 __y) { __y = __x; }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_reverse_copy(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _OutputIterator __d_first, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, + _OutputIterator __d_first) noexcept { - return __internal::__brick_reverse_copy(__first, __last, __d_first, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_reverse_copy(__first, __last, __d_first, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_reverse_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + auto __len = __last - __first; if (__len == 0) return __d_first; - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__is_vector, __first, __len, __d_first](_RandomAccessIterator1 __inner_first, - _RandomAccessIterator1 __inner_last) { - __internal::__brick_reverse_copy(__inner_first, __inner_last, - __d_first + (__len - (__inner_last - __first)), - __is_vector); - }); + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last) { + __internal::__brick_reverse_copy(__inner_first, __inner_last, + __d_first + (__len - (__inner_last - __first)), _IsVector{}); + }); return __d_first + __len; } @@ -1813,43 +1833,47 @@ __brick_rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _R return __ret; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last) noexcept { - return __internal::__brick_rotate(__first, __middle, __last, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_rotate(__first, __middle, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; auto __n = __last - __first; auto __m = __middle - __first; if (__m <= __n / 2) { - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n - __m); - return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __n - __m); + return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, &__buf]() { _Tp* __result = __buf.get(); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, - [__middle, __result, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __middle), __is_vector); - }); - - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, - [__last, __middle, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_move<_ExecutionPolicy>{}(__b, __e, __b + (__last - __middle), __is_vector); - }); - - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + (__n - __m), - [__first, __result, __is_vector](_Tp* __b, _Tp* __e) { - __brick_move_destroy<_ExecutionPolicy>{}( - __b, __e, __first + (__b - __result), __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__middle, __result](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_uninitialized_move( + __b, __e, __result + (__b - __middle), _IsVector{}); + }); + + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__last, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __b, __e, __b + (__last - __middle), _IsVector{}); + }); + + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + (__n - __m), [__first, __result](_Tp* __b, _Tp* __e) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __b, __e, __first + (__b - __result), _IsVector{}); }); return __first + (__last - __middle); @@ -1857,25 +1881,25 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand } else { - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__m); - return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, __is_vector, &__buf]() { + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __m); + return __internal::__except_handler([&__exec, __n, __m, __first, __middle, __last, &__buf]() { _Tp* __result = __buf.get(); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, - [__first, __result, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __first), __is_vector); - }); - - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, - [__first, __middle, __is_vector](_RandomAccessIterator __b, _RandomAccessIterator __e) { - __internal::__brick_move<_ExecutionPolicy>{}(__b, __e, __first + (__b - __middle), __is_vector); - }); - - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __result, __result + __m, - [__n, __m, __first, __result, __is_vector](_Tp* __b, _Tp* __e) { - __brick_move_destroy<_ExecutionPolicy>{}( - __b, __e, __first + ((__n - __m) + (__b - __result)), __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, + [__first, __result](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_uninitialized_move( + __b, __e, __result + (__b - __first), _IsVector{}); + }); + + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __middle, __last, + [__first, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e) { + __internal::__brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __b, __e, __first + (__b - __middle), _IsVector{}); + }); + + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __result, + __result + __m, [__n, __m, __first, __result](_Tp* __b, _Tp* __e) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __b, __e, __first + ((__n - __m) + (__b - __result)), _IsVector{}); }); return __first + (__last - __middle); @@ -1887,59 +1911,64 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Rand // rotate_copy //------------------------------------------------------------------------ -template +template _OutputIterator -__brick_rotate_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last, - _OutputIterator __result, /*__is_vector=*/::std::false_type) noexcept +__brick_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last, _OutputIterator __result) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return ::std::rotate_copy(__first, __middle, __last, __result); } -template +template _RandomAccessIterator2 -__brick_rotate_copy(_ExecutionPolicy&&, _RandomAccessIterator1 __first, _RandomAccessIterator1 __middle, - _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, - /*__is_vector=*/::std::true_type) noexcept +__brick_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last, + _RandomAccessIterator2 __result) noexcept { - _RandomAccessIterator2 __res = __brick_copy<_ExecutionPolicy>{}(__middle, __last, __result, ::std::true_type()); - return __internal::__brick_copy<_ExecutionPolicy>{}(__first, __middle, __res, ::std::true_type()); + _RandomAccessIterator2 __res = + __brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}(__middle, __last, __result); + return __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}(__first, __middle, __res); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, - _ForwardIterator __last, _OutputIterator __result, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_rotate_copy(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, + _ForwardIterator __last, _OutputIterator __result) noexcept { - return __internal::__brick_rotate_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, - __result, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_rotate_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, + __result); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __middle, - _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __last, __middle, __result, __is_vector](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { - __internal::__brick_copy<_ExecutionPolicy> __copy{}; + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __last, __middle, __result](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { + __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy> __copy{}; if (__b > __middle) { - __copy(__b, __e, __result + (__b - __middle), __is_vector); + __copy(__b, __e, __result + (__b - __middle), _IsVector{}); } else { _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first)); if (__e < __middle) { - __copy(__b, __e, __new_result, __is_vector); + __copy(__b, __e, __new_result, _IsVector{}); } else { - __copy(__b, __middle, __new_result, __is_vector); - __copy(__middle, __e, __result, __is_vector); + __copy(__b, __middle, __new_result, _IsVector{}); + __copy(__middle, __e, __result, _IsVector{}); } } }); @@ -1985,19 +2014,23 @@ __brick_is_partitioned(_RandomAccessIterator __first, _RandomAccessIterator __la } } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +bool +__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_is_partitioned(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_is_partitioned(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +bool +__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + //trivial pre-checks if (__first == __last) return true; @@ -2035,9 +2068,9 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs const _ReduceType __identity{__not_init, __last}; _ReduceType __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, - [&__pred, __combine, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, - _ReduceType __value) -> _ReduceType { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __identity, + [&__pred, __combine](_RandomAccessIterator __i, _RandomAccessIterator __j, + _ReduceType __value) -> _ReduceType { if (__value.__val == __broken) return _ReduceType{__broken, __i}; @@ -2047,11 +2080,11 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs { // find first element that don't satisfy pred _RandomAccessIterator __x = - __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate&>(__pred), __is_vector); + __internal::__brick_find_if(__i + 1, __j, __not_pred<_UnaryPredicate&>(__pred), _IsVector{}); if (__x != __j) { // find first element after "x" that satisfy pred - _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, __is_vector); + _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, _IsVector{}); // if it was found then range isn't partitioned by pred if (__y != __j) return _ReduceType{__broken, __i}; @@ -2065,7 +2098,7 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs { // if first element doesn't satisfy pred // then we should find the first element that satisfy pred. // If we found it then range isn't partitioned by pred - if (__internal::__brick_find_if(__i + 1, __j, __pred, __is_vector) != __j) + if (__internal::__brick_find_if(__i + 1, __j, __pred, _IsVector{}) != __j) return _ReduceType{__broken, __i}; __res = _ReduceType{__all_false, __i}; @@ -2111,19 +2144,22 @@ __brick_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _ return ::std::partition(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_partition(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_partition(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_partition(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) @@ -2138,7 +2174,7 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R _PartitionRange __init{__last, __last, __last}; // lambda for merging two partitioned ranges to one partitioned range - auto __reductor = [&__exec, __is_vector](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { + auto __reductor = [&__exec](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { auto __size1 = __val1.__end - __val1.__pivot; auto __size2 = __val2.__pivot - __val2.__begin; auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); @@ -2153,10 +2189,10 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R else if (__size2 > __size1) { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, - [__val1, __val2, __size1, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1, + [__val1, __val2, __size1](_RandomAccessIterator __i, _RandomAccessIterator __j) { __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot), - __is_vector); + _IsVector{}); }); return {__new_begin, __val2.__pivot - __size1, __val2.__end}; } @@ -2164,20 +2200,20 @@ __pattern_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _R else { __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, - [__val1, __val2, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), __is_vector); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2, + [__val1, __val2](_RandomAccessIterator __i, _RandomAccessIterator __j) { + __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), _IsVector{}); }); return {__new_begin, __val1.__pivot + __size2, __val2.__end}; } }; _PartitionRange __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, - [__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, - _PartitionRange __value) -> _PartitionRange { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, + _PartitionRange __value) -> _PartitionRange { //1. serial partition - _RandomAccessIterator __pivot = __internal::__brick_partition(__i, __j, __pred, __is_vector); + _RandomAccessIterator __pivot = __internal::__brick_partition(__i, __j, __pred, _IsVector{}); // 2. merging of two ranges (left and right respectively) return __reductor(__value, {__i, __pivot, __j}); @@ -2208,21 +2244,23 @@ __brick_stable_partition(_RandomAccessIterator __first, _RandomAccessIterator __ return ::std::stable_partition(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_stable_partition(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, - /*is_parallelization=*/::std::false_type) noexcept +template +_BidirectionalIterator +__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_stable_partition(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_stable_partition(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, - /*is_parallelization=*/::std::true_type) +template +_RandomAccessIterator +__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + // partitioned range: elements before pivot satisfy pred (true part), // elements after pivot don't satisfy pred (false part) struct _PartitionRange @@ -2236,7 +2274,7 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi _PartitionRange __init{__last, __last, __last}; // lambda for merging two partitioned ranges to one partitioned range - auto __reductor = [__is_vector](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { + auto __reductor = [](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange { auto __size1 = __val1.__end - __val1.__pivot; auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin); @@ -2249,17 +2287,17 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _RandomAccessIterator __fi // then we should swap the false part of left range and last part of true part of right range else { - __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, __is_vector); + __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, _IsVector{}); return {__new_begin, __val2.__pivot - __size1, __val2.__end}; } }; _PartitionRange __result = __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, - [&__pred, __is_vector, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, - _PartitionRange __value) -> _PartitionRange { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, + [&__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j, + _PartitionRange __value) -> _PartitionRange { //1. serial stable_partition - _RandomAccessIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, __is_vector); + _RandomAccessIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, _IsVector{}); // 2. merging of two ranges (left and right respectively) return __reductor(__value, {__i, __pivot, __j}); @@ -2295,40 +2333,42 @@ __brick_partition_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __ #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_OutputIterator1, _OutputIterator2>> -__pattern_partition_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallelization=*/::std::false_type) noexcept +template +::std::pair<_OutputIterator1, _OutputIterator2> +__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept { - return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator2, _RandomAccessIterator3>> -__pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __out_true, _RandomAccessIterator3 __out_false, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallelization=*/::std::true_type) +template +::std::pair<_RandomAccessIterator2, _RandomAccessIterator3> +__pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __out_true, + _RandomAccessIterator3 __out_false, _UnaryPredicate __pred) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; typedef ::std::pair<_DifferenceType, _DifferenceType> _ReturnType; const _DifferenceType __n = __last - __first; if (_DifferenceType(1) < __n) { - __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__n); - return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __is_vector, __pred, - &__mask_buf]() { + __par_backend::__buffer<_ExecutionPolicy, bool> __mask_buf(__exec, __n); + return __internal::__except_handler([&__exec, __n, __first, __out_true, __out_false, __pred, &__mask_buf]() { bool* __mask = __mask_buf.get(); _ReturnType __m{}; __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, + ::std::make_pair(_DifferenceType(0), _DifferenceType(0)), [=](_DifferenceType __i, _DifferenceType __len) { // Reduce return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len), - __mask + __i, __pred, __is_vector); + __mask + __i, __pred, _IsVector{}); }, [](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType { return ::std::make_pair(__x.first + __y.first, __x.second + __y.second); @@ -2336,39 +2376,45 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir [=](_DifferenceType __i, _DifferenceType __len, _ReturnType __initial) { // Scan __internal::__brick_partition_by_mask(__first + __i, __first + (__i + __len), __out_true + __initial.first, __out_false + __initial.second, - __mask + __i, __is_vector); + __mask + __i, _IsVector{}); }, [&__m](_ReturnType __total) { __m = __total; }); return ::std::make_pair(__out_true + __m.first, __out_false + __m.second); }); } // trivial sequence - use serial algorithm - return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, __is_vector); + return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, _IsVector{}); } //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::false_type, _IsMoveConstructible) noexcept +template +void +__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + _IsMoveConstructible) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + ::std::sort(__first, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type) +template +void +__pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp, + /*is_move_constructible=*/::std::true_type) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { ::std::sort(__first, __last, __comp); }, - __last - __first); + __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + ::std::sort(__first, __last, __comp); + }, + __last - __first); }); } @@ -2376,24 +2422,30 @@ __pattern_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Random // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector /*is_vector*/, /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + ::std::stable_sort(__first, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector /*is_vector*/, /*is_parallel=*/::std::true_type) +template +void +__pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - [](_RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp) { ::std::stable_sort(__first, __last, __comp); }, - __last - __first); + __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + ::std::stable_sort(__first, __last, __comp); + }, + __last - __first); }); } @@ -2401,41 +2453,47 @@ __pattern_stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, // sort_by_key //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_sort_by_key(_Tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, + _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, + _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - auto __cmp_f = - [__comp](const auto& __a, const auto& __b) { return __comp(::std::get<0>(__a), ::std::get<0>(__b)); }; + auto __cmp_f = [__comp](const auto& __a, const auto& __b) { + return __comp(::std::get<0>(__a), ::std::get<0>(__b)); + }; ::std::sort(__beg, __end, __cmp_f); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, - _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp, - _IsVector /*vector=*/, /*is_parallel=*/::std::true_type) +template +void +__pattern_sort_by_key(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, + _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp) { - static_assert(::std::is_move_constructible_v::value_type> - && ::std::is_move_constructible_v::value_type>, + static_assert( + ::std::is_move_constructible_v::value_type> && + ::std::is_move_constructible_v::value_type>, "The keys and values should be move constructible in case of parallel execution."); auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - auto __cmp_f = - [__comp](const auto& __a, const auto& __b) { return __comp(::std::get<0>(__a), ::std::get<0>(__b)); }; + auto __cmp_f = [__comp](const auto& __a, const auto& __b) { + return __comp(::std::get<0>(__a), ::std::get<0>(__b)); + }; + + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; __internal::__except_handler([&]() { - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, - [](auto __first, auto __last, auto __cmp) - { ::std::sort(__first, __last, __cmp); },__end - __beg); + __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __cmp_f, + [](auto __first, auto __last, auto __cmp) { ::std::sort(__first, __last, __cmp); }, __end - __beg); }); } @@ -2443,27 +2501,30 @@ __pattern_sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_f // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle, + _RandomAccessIterator __last, _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + ::std::partial_sort(__first, __middle, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector, /*is_parallel=*/::std::true_type) +template +void +__pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + const auto __n = __middle - __first; if (__n == 0) return; __except_handler([&]() { __par_backend::__parallel_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp) { if (__n < __end - __begin) ::std::partial_sort(__begin, __begin + __n, __end, __comp); @@ -2478,22 +2539,25 @@ __pattern_partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, // partial_sort_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_partial_sort_copy(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept +template +_RandomAccessIterator +__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return ::std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _RandomAccessIterator2 __d_last, _Compare __comp, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, + _RandomAccessIterator2 __d_last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__last == __first || __d_last == __d_first) { return __d_first; @@ -2504,14 +2568,13 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ if (__n2 >= __n1) { __par_backend::__parallel_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, - [__first, __d_first, __is_vector](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, - _Compare __comp) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp, + [__first, __d_first](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp) { _RandomAccessIterator1 __i1 = __first + (__i - __d_first); _RandomAccessIterator1 __j1 = __first + (__j - __d_first); // 1. Copy elements from input to output - __brick_copy<_ExecutionPolicy>{}(__i1, __j1, __i, __is_vector); + __brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}(__i1, __j1, __i, _IsVector{}); // 2. Sort elements in output sequence ::std::sort(__i, __j, __comp); }, @@ -2522,38 +2585,39 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __ { typedef typename ::std::iterator_traits<_RandomAccessIterator1>::value_type _T1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::value_type _T2; - __par_backend::__buffer<_ExecutionPolicy, _T1> __buf(__n1); + __par_backend::__buffer<_ExecutionPolicy, _T1> __buf(__exec, __n1); _T1* __r = __buf.get(); - __par_backend::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, - [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { - _RandomAccessIterator1 __it = __first + (__i - __r); + __par_backend::__parallel_stable_sort( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp, + [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp) { + _RandomAccessIterator1 __it = __first + (__i - __r); - // 1. Copy elements from input to raw memory - for (_T1* __k = __i; __k != __j; ++__k, ++__it) - { - ::new (__k) _T2(*__it); - } + // 1. Copy elements from input to raw memory + for (_T1* __k = __i; __k != __j; ++__k, ++__it) + { + ::new (__k) _T2(*__it); + } - // 2. Sort elements in temporary buffer - if (__n2 < __j - __i) - ::std::partial_sort(__i, __i + __n2, __j, __comp); - else - ::std::sort(__i, __j, __comp); - }, - __n2); + // 2. Sort elements in temporary buffer + if (__n2 < __j - __i) + ::std::partial_sort(__i, __i + __n2, __j, __comp); + else + ::std::sort(__i, __j, __comp); + }, + __n2); // 3. Move elements from temporary buffer to output - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, - [__r, __d_first, __is_vector](_T1* __i, _T1* __j) { - __brick_move_destroy<_ExecutionPolicy>{}( - __i, __j, __d_first + (__i - __r), __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2, + [__r, __d_first](_T1* __i, _T1* __j) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __i, __j, __d_first + (__i - __r), _IsVector{}); }); if constexpr (!::std::is_trivially_destructible_v<_T1>) - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __r + __n2, __r + __n1, - [__is_vector](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, __is_vector); }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r + __n2, + __r + __n1, + [](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, _IsVector{}); }); return __d_first + __n2; } @@ -2579,28 +2643,31 @@ __brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _Binary return ::std::adjacent_find(__first, __last, __pred); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_adjacent_find(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred, - /* is_parallel */ ::std::false_type, _IsVector __is_vector, _Semantic) noexcept +template +_ForwardIterator +__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _BinaryPredicate __pred, _Semantic) noexcept { - return __internal::__brick_adjacent_find(__first, __last, __pred, __is_vector, _Semantic::value); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_adjacent_find(__first, __last, __pred, typename _Tag::__is_vector{}, _Semantic::value); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _BinaryPredicate __pred, /* is_parallel */ ::std::true_type, _IsVector __is_vector, - _Semantic __or_semantic) +template +_RandomAccessIterator +__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _BinaryPredicate __pred, _Semantic __or_semantic) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__last - __first < 2) return __last; return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, - [__last, __pred, __is_vector, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, - _RandomAccessIterator __value) -> _RandomAccessIterator { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __last, + [__last, __pred, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end, + _RandomAccessIterator __value) -> _RandomAccessIterator { // TODO: investigate performance benefits from the use of shared variable for the result, // checking (compare_and_swap idiom) its __value at __first. if (__or_semantic && __value < __last) @@ -2619,7 +2686,7 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first //correct the global result iterator if the "brick" returns a local "__last" const _RandomAccessIterator __res = - __internal::__brick_adjacent_find(__begin, __end, __pred, __is_vector, __or_semantic); + __internal::__brick_adjacent_find(__begin, __end, __pred, _IsVector{}, __or_semantic); if (__res < __end) __value = __res; } @@ -2636,20 +2703,20 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _RandomAccessIterator __first // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth, - _RandomAccessIterator __last, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept +template +void +__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth, + _RandomAccessIterator __last, _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + ::std::nth_element(__first, __nth, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, - _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +void +__pattern_nth_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) { if (__first == __last || __nth == __last) { @@ -2661,10 +2728,8 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __x; do { - __x = __internal::__pattern_partition(::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, - [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }, - __is_vector, - /*is_parallel=*/::std::true_type()); + __x = __internal::__pattern_partition(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, + [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); }); --__x; if (__x != __first) { @@ -2692,8 +2757,8 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, //------------------------------------------------------------------------ // fill, fill_n //------------------------------------------------------------------------ -template -struct __brick_fill<_Tp, _ExecutionPolicy, oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_fill<_Tag, _ExecutionPolicy, _Tp, ::std::enable_if_t<__is_host_dispatch_tag_v<_Tag>>> { const _Tp& __value; @@ -2714,33 +2779,34 @@ struct __brick_fill<_Tp, _ExecutionPolicy, oneapi::dpl::__internal::__enable_if_ } }; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_fill(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept +template +void +__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) noexcept { - __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__first, __last, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __internal::__brick_fill<_Tag, _ExecutionPolicy, _Tp>{__value}(__first, __last, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_fill(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - const _Tp& __value, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) +template +_RandomAccessIterator +__pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, const _Tp& __value) { - return __internal::__except_handler([&__exec, __first, __last, &__value, __is_vector]() { - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__value, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { - __internal::__brick_fill<_Tp, _ExecutionPolicy>{__value}(__begin, __end, __is_vector); - }); + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + return __internal::__except_handler([&__exec, __first, __last, &__value]() { + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__value](_RandomAccessIterator __begin, _RandomAccessIterator __end) { + __internal::__brick_fill<__parallel_tag<_IsVector>, _ExecutionPolicy, _Tp>{ + __value}(__begin, __end, _IsVector{}); + }); return __last; }); } -template -struct __brick_fill_n<_Tp, _ExecutionPolicy, - oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy>> +template +struct __brick_fill_n<_Tag, _ExecutionPolicy, _Tp, ::std::enable_if_t<__is_host_dispatch_tag_v<_Tag>>> { const _Tp& __value; @@ -2761,21 +2827,23 @@ struct __brick_fill_n<_Tp, _ExecutionPolicy, } }; -template +template _OutputIterator -__pattern_fill_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept +__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value) noexcept { - return __internal::__brick_fill_n<_Tp, _ExecutionPolicy>{__value}(__first, __count, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_fill_n<_Tag, _ExecutionPolicy, _Tp>{__value}(__first, __count, + typename _Tag::__is_vector{}); } -template +template _RandomAccessIterator -__pattern_fill_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, const _Tp& __value, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) +__pattern_fill_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __count, const _Tp& __value) { - return __internal::__pattern_fill(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __value, - ::std::true_type(), __is_vector); + return __internal::__pattern_fill(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, + __value); } //------------------------------------------------------------------------ @@ -2797,24 +2865,26 @@ __brick_generate(_ForwardIterator __first, _ForwardIterator __last, _Generator _ ::std::generate(__first, __last, __g); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_generate(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept +template +void +__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) noexcept { - __internal::__brick_generate(__first, __last, __g, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __internal::__brick_generate(__first, __last, __g, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_generate(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Generator __g, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) +template +_RandomAccessIterator +__pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Generator __g) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__g, __is_vector](_RandomAccessIterator __begin, _RandomAccessIterator __end) { - __internal::__brick_generate(__begin, __end, __g, __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__g](_RandomAccessIterator __begin, _RandomAccessIterator __end) { + __internal::__brick_generate(__begin, __end, __g, _IsVector{}); }); return __last; }); @@ -2835,23 +2905,24 @@ __brick_generate_n(OutputIterator __first, Size __count, _Generator __g, /* is_v return ::std::generate_n(__first, __count, __g); } -template +template _OutputIterator -__pattern_generate_n(_ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g, - /*is_parallel=*/::std::false_type, _IsVector __is_vector) noexcept +__pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g) noexcept { - return __internal::__brick_generate_n(__first, __count, __g, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_generate_n(__first, __count, __g, typename _Tag::__is_vector{}); } -template +template _RandomAccessIterator -__pattern_generate_n(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __count, _Generator __g, - /*is_parallel=*/::std::true_type, _IsVector __is_vector) +__pattern_generate_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _Size __count, _Generator __g) { static_assert(__is_random_access_iterator_v<_RandomAccessIterator>, "Pattern-brick error. Should be a random access iterator."); - return __internal::__pattern_generate(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __g, - ::std::true_type(), __is_vector); + return __internal::__pattern_generate(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, + __g); } //------------------------------------------------------------------------ @@ -2878,34 +2949,35 @@ __brick_remove_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _ #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_remove_if(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, - _IsVector __is_vector, /*is_parallel*/ ::std::false_type) noexcept +template +_ForwardIterator +__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _UnaryPredicate __pred) noexcept { - return __internal::__brick_remove_if(__first, __last, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_remove_if(__first, __last, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_remove_if(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryPredicate __pred, _IsVector __is_vector, /*is_parallel*/ ::std::true_type) +template +_RandomAccessIterator +__pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _UnaryPredicate __pred) { typedef typename ::std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType; if (__first == __last || __first + 1 == __last) { // Trivial sequence - use serial algorithm - return __internal::__brick_remove_if(__first, __last, __pred, __is_vector); + return __internal::__brick_remove_if(__first, __last, __pred, _IsVector{}); } return __internal::__remove_elements( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [&__pred, __is_vector](bool* __b, bool* __e, _RandomAccessIterator __it) { - __internal::__brick_walk2(__b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, - __is_vector); - }, - __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it) { + __internal::__brick_walk2( + __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, _IsVector{}); + }); } //------------------------------------------------------------------------ @@ -2931,29 +3003,34 @@ __brick_merge(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _ return ::std::merge(__first1, __last1, __first2, __last2, __d_first, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_merge(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, - _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept +template +_OutputIterator +__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __d_first, + _Compare __comp) noexcept { - return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __d_first, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +_RandomAccessIterator3 +__pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __d_first, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + __par_backend::__parallel_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, - [__is_vector](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, - _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) { - return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, __is_vector); - }); + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, + __comp, + [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, + _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, + _Compare __comp) { return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, _IsVector{}); }); return __d_first + (__last1 - __first1) + (__last2 - __first2); } @@ -2977,28 +3054,31 @@ __brick_inplace_merge(_RandomAccessIterator __first, _RandomAccessIterator __mid ::std::inplace_merge(__first, __middle, __last, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle, - _BidirectionalIterator __last, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept +template +void +__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle, + _BidirectionalIterator __last, _Compare __comp) noexcept { - __internal::__brick_inplace_merge(__first, __middle, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + __internal::__brick_inplace_merge(__first, __middle, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, - _RandomAccessIterator __last, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +void +__pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + if (__first == __last || __first == __middle || __middle == __last) { return; } + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _Tp; auto __n = __last - __first; - __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__n); + __par_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __n); _Tp* __r = __buf.get(); __internal::__except_handler([&]() { auto __move_values = [](_RandomAccessIterator __x, _Tp* __z) { @@ -3009,11 +3089,11 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first }; auto __move_sequences = [](_RandomAccessIterator __first1, _RandomAccessIterator __last1, _Tp* __first2) { - return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector()); + return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector{}); }; __par_backend::__parallel_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r, __comp, [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1, _RandomAccessIterator __f2, _RandomAccessIterator __l2, _Tp* __f3, _Compare __comp) { @@ -3021,10 +3101,11 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first __move_sequences, __move_sequences); return __f3 + (__l1 - __f1) + (__l2 - __f2); }); - __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, [__r, __first, __is_vector](_Tp* __i, _Tp* __j) { - __brick_move_destroy<_ExecutionPolicy>{}(__i, __j, __first + (__i - __r), __is_vector); - }); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __r, __r + __n, + [__r, __first](_Tp* __i, _Tp* __j) { + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __i, __j, __first + (__i - __r), _IsVector{}); + }); }); } @@ -3032,21 +3113,22 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _RandomAccessIterator __first // includes //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, _IsVector, - /*is_parallel=*/::std::false_type) noexcept +template +bool +__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return ::std::includes(__first1, __last1, __first2, __last2, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Compare __comp, _IsVector, - /*is_parallel=*/::std::true_type) +template +bool +__pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Compare __comp) { if (__first2 == __last2) return true; @@ -3068,14 +3150,14 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ return __internal::__except_handler([&]() { return !__internal::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j) { assert(__j > __i); //assert(__j - __i > 1); //1. moving boundaries to "consume" subsequence of equal elements auto __is_equal_sorted = [&__comp](_RandomAccessIterator2 __a, _RandomAccessIterator2 __b) -> bool { - //enough one call of __comp due to compared couple belongs to one sorted sequience + //enough one call of __comp due to compared couple belongs to one sorted sequence return !__comp(*__a, *__b); }; @@ -3105,14 +3187,16 @@ __pattern_includes(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _ inline constexpr auto __set_algo_cut_off = 1000; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _SizeFunction __size_func, _SetOP __set_op, _IsVector __is_vector) +template +_OutputIterator +__parallel_set_op(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op) { - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; typedef typename ::std::iterator_traits<_OutputIterator>::value_type _T; struct _SetRange @@ -3128,23 +3212,23 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar const _DifferenceType __n1 = __last1 - __first1; const _DifferenceType __n2 = __last2 - __first2; - __par_backend::__buffer<_ExecutionPolicy, _T> __buf(__size_func(__n1, __n2)); + __par_backend::__buffer<_ExecutionPolicy, _T> __buf(__exec, __size_func(__n1, __n2)); - return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __is_vector, - __comp, __size_func, __set_op, &__buf]() { + return __internal::__except_handler([&__exec, __n1, __first1, __last1, __first2, __last2, __result, __comp, + __size_func, __set_op, &__buf]() { auto __tmp_memory = __buf.get(); _DifferenceType __m{}; auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan if (!__s.empty()) - __brick_move_destroy<_ExecutionPolicy>{}(__tmp_memory + __s.__buf_pos, - __tmp_memory + (__s.__buf_pos + __s.__len), - __result + __s.__pos, __is_vector); + __brick_move_destroy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __tmp_memory + __s.__buf_pos, __tmp_memory + (__s.__buf_pos + __s.__len), __result + __s.__pos, + _IsVector{}); }; __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, - [=](_DifferenceType __i, _DifferenceType __len) { // Reduce + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0}, + [=](_DifferenceType __i, _DifferenceType __len) { // Reduce //[__b; __e) - a subrange of the first sequence, to reduce - _ForwardIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len); + _RandomAccessIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len); //try searching for the first element which not equal to *__b if (__b != __first1) @@ -3157,7 +3241,7 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar //check is [__b; __e) empty if (__e - __b < 1) { - _ForwardIterator2 __bb = __last2; + _RandomAccessIterator2 __bb = __last2; if (__b != __last1) __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); @@ -3166,11 +3250,11 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar } //try searching for "corresponding" subrange [__bb; __ee) in the second sequence - _ForwardIterator2 __bb = __first2; + _RandomAccessIterator2 __bb = __first2; if (__b != __first1) __bb = ::std::lower_bound(__first2, __last2, *__b, __comp); - _ForwardIterator2 __ee = __last2; + _RandomAccessIterator2 __ee = __last2; if (__e != __last1) __ee = ::std::lower_bound(__bb, __last2, *__e, __comp); @@ -3196,64 +3280,66 @@ __parallel_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwar } //a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_difference' -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _SetUnionOp __set_union_op, _IsVector __is_vector) +template +_OutputIterator +__parallel_set_union_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _OutputIterator __result, _Compare __comp, _SetUnionOp __set_union_op) { - typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _DifferenceType; + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + + typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; - __brick_copy<_ExecutionPolicy> __copy_range{}; + __brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy> __copy_range{}; // {1} {}: parallel copying just first sequence if (__n2 == 0) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __copy_range, ::std::true_type()); + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __copy_range); // {} {2}: parallel copying justmake second sequence if (__n1 == 0) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, - __copy_range, ::std::true_type()); + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result, __copy_range); // testing whether the sequences are intersected - _ForwardIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); + _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); if (__left_bound_seq_1 == __last1) { //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __copy_range); }, [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, - __result + __n1, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result + __n1, __copy_range); }); return __result + __n1 + __n2; } // testing whether the sequences are intersected - _ForwardIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); + _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); if (__left_bound_seq_2 == __last2) { //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2 __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, __result, - __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, __last2, + __result, __copy_range); }, [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __result + __n2, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result + __n2, __copy_range); }); return __result + __n1 + __n2; } @@ -3264,17 +3350,17 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ auto __res_or = __result; __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first1; left_bound_seq_1) [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, - __left_bound_seq_1, __res_or, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __left_bound_seq_1, __res_or, __copy_range); }, [=, &__result] { __result = __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, - __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); }); return __result; } @@ -3286,24 +3372,24 @@ __parallel_set_union_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ auto __res_or = __result; __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) __par_backend::__parallel_invoke( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), //do parallel copying of [first2; left_bound_seq_2) [=] { - __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first2, - __left_bound_seq_2, __res_or, __copy_range, ::std::true_type()); + __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first2, + __left_bound_seq_2, __res_or, __copy_range); }, [=, &__result] { __result = __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, - __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, + __set_union_op); }); return __result; } return __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op, __is_vector); + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op); } //------------------------------------------------------------------------ @@ -3340,25 +3426,26 @@ __brick_set_union(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last return ::std::set_union(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_OutputIterator +__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, + _Compare __comp) noexcept { - return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*__is_parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_union(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _OutputIterator __result, _Compare __comp) { - const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3368,13 +3455,12 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, typedef typename ::std::iterator_traits<_OutputIterator>::value_type _Tp; return __parallel_set_union_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + }); } //------------------------------------------------------------------------ @@ -3401,23 +3487,25 @@ __brick_set_intersection(_RandomAccessIterator1 __first1, _RandomAccessIterator1 return ::std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, +template +_OutputIterator +__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept + _Compare __comp) noexcept { - return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _RandomAccessIterator3 __result, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator3 +__pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) { typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; @@ -3446,14 +3534,13 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f { //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) return __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, __result, __comp); - }, - __is_vector); + }); } const auto __m2 = __last2 - __left_bound_seq_2 + __n1; @@ -3461,14 +3548,13 @@ __pattern_set_intersection(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f { //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) __result = __internal::__parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, __comp, - [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, + __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, __result, __comp); - }, - __is_vector); + }); return __result; } @@ -3499,23 +3585,25 @@ __brick_set_difference(_RandomAccessIterator1 __first1, _RandomAccessIterator1 _ return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, +template +_OutputIterator +__pattern_set_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept + _Compare __comp) noexcept { - return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_set_difference(__first1, __last1, __first2, __last2, __result, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, - _RandomAccessIterator3 __result, _Compare __comp, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator3 +__pattern_set_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp) { typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType; @@ -3529,33 +3617,32 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __fir // {1} \ {}: parallel copying just first sequence if (__n2 == 0) - return __pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}); // testing whether the sequences are intersected _RandomAccessIterator1 __left_bound_seq_1 = ::std::lower_bound(__first1, __last1, *__first2, __comp); //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence if (__left_bound_seq_1 == __last1) - return __pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, + __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}); // testing whether the sequences are intersected _RandomAccessIterator2 __left_bound_seq_2 = ::std::lower_bound(__first2, __last2, *__first1, __comp); //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence if (__left_bound_seq_2 == __last2) - return __internal::__pattern_walk2_brick(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + return __internal::__pattern_walk2_brick(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __result, __brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy>{}); if (__n1 + __n2 > __set_algo_cut_off) return __parallel_set_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_DifferenceType __n, _DifferenceType) { return __n; }, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_difference_construct(__first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + }); // use serial algorithm return ::std::set_difference(__first1, __last1, __first2, __last2, __result, __comp); @@ -3585,26 +3672,27 @@ __brick_set_symmetric_difference(_RandomAccessIterator1 __first1, _RandomAccessI return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, +template +_OutputIterator +__pattern_set_symmetric_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept + _Compare __comp) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return __internal::__brick_set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp, - __is_vector); + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator3> -__pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, - _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp, - _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator3 +__pattern_set_symmetric_difference(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __result, _Compare __comp) { - const auto __n1 = __last1 - __first1; const auto __n2 = __last2 - __first2; @@ -3614,13 +3702,12 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _RandomAccessItera typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; return __internal::__parallel_set_union_op( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { return oneapi::dpl::__utils::__set_symmetric_difference_construct( __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }, - __is_vector); + }); } //------------------------------------------------------------------------ @@ -3646,12 +3733,14 @@ __brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __las [&__comp](_RandomAccessIterator __it, _SizeType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +_RandomAccessIterator +__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept { - return __internal::__brick_is_heap_until(__first, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_is_heap_until(__first, __last, __comp, typename _Tag::__is_vector{}); } template @@ -3676,18 +3765,18 @@ __is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _D [&__comp](_RandomAccessIterator __it, _DifferenceType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +_RandomAccessIterator +__pattern_is_heap_until(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { return __internal::__except_handler([&]() { - return __parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { - return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, - __comp, __is_vector); - }, - ::std::true_type{}); + return __parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp, _IsVector{}); + }, + ::std::true_type{}); }); } @@ -3732,24 +3821,26 @@ __is_heap_local(_RandomAccessIterator __first, _DifferenceType __begin, _Differe }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +bool +__pattern_is_heap(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, + _Compare __comp) noexcept { - return __internal::__brick_is_heap(__first, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_is_heap(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +bool +__pattern_is_heap(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { return __internal::__except_handler([&]() { - return !__parallel_or(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __comp, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j) { + return !__parallel_or(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) { return !__internal::__is_heap_local(__first, __i - __first, __j - __first, __comp, - __is_vector); + _IsVector{}); }); }); } @@ -3778,30 +3869,34 @@ __brick_min_element(_RandomAccessIterator __first, _RandomAccessIterator __last, #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_min_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +_ForwardIterator +__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Compare __comp) noexcept { - return __internal::__brick_min_element(__first, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_min_element(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_min_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +_RandomAccessIterator +__pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + // a trivial case pre-check if (__last - __first < 2) return __first; return __internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ __last, [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _RandomAccessIterator __init) -> _RandomAccessIterator { const _RandomAccessIterator __subresult = - __internal::__brick_min_element(__begin, __end, __comp, __is_vector); + __internal::__brick_min_element(__begin, __end, __comp, _IsVector{}); return __init == __last ? __subresult : __internal::__cmp_iterators_by_values(__init, __subresult, __comp, oneapi::dpl::__internal::__pstl_less()); @@ -3841,21 +3936,23 @@ __brick_minmax_element(_RandomAccessIterator __first, _RandomAccessIterator __la #endif } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator, _ForwardIterator>> -__pattern_minmax_element(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +::std::pair<_ForwardIterator, _ForwardIterator> +__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _Compare __comp) noexcept { - return __internal::__brick_minmax_element(__first, __last, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_minmax_element(__first, __last, __comp, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator, _RandomAccessIterator>> -__pattern_minmax_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +::std::pair<_RandomAccessIterator, _RandomAccessIterator> +__pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + // a trivial case pre-check if (__last - __first < 2) return ::std::make_pair(__first, __first); @@ -3864,10 +3961,10 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs typedef ::std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result; return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, /*identity*/ ::std::make_pair(__last, __last), [=, &__comp](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result { - const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, __is_vector); + const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, _IsVector{}); if (__init.first == __last) // = identity return __subresult; return ::std::make_pair( @@ -3920,31 +4017,30 @@ __brick_mismatch(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1 return __unseq_backend::__simd_first(__first1, __n, __first2, __not_pred<_Predicate&>(__pred)); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_ForwardIterator1, _ForwardIterator2>> -__pattern_mismatch(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred, _IsVector __is_vector, - /* is_parallel = */ ::std::false_type) noexcept +template +::std::pair<_ForwardIterator1, _ForwardIterator2> +__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred) noexcept { - return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, - ::std::pair<_RandomAccessIterator1, _RandomAccessIterator2>> -__pattern_mismatch(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Predicate __pred, - _IsVector __is_vector, /* is_parallel = */ ::std::true_type) +template +::std::pair<_RandomAccessIterator1, _RandomAccessIterator2> +__pattern_mismatch(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Predicate __pred) { return __internal::__except_handler([&]() { auto __n = ::std::min(__last1 - __first1, __last2 - __first2); auto __result = __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [__first1, __first2, __pred, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - __pred, __is_vector) + __pred, _IsVector{}) .first; }, ::std::true_type{}); @@ -4002,22 +4098,24 @@ __brick_lexicographical_compare(_RandomAccessIterator1 __first1, _RandomAccessIt } } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, - _IsVector __is_vector, /* is_parallel = */ ::std::false_type) noexcept +template +bool +__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept { - return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, - _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _Compare __comp, _IsVector __is_vector, - /* is_parallel = */ ::std::true_type) +template +bool +__pattern_lexicographical_compare(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, + _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _Compare __comp) noexcept { if (__first2 == __last2) { // if second sequence is empty @@ -4035,13 +4133,14 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterat --__last2; auto __n = ::std::min(__last1 - __first1, __last2 - __first2); auto __result = __internal::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [__first1, __first2, &__comp, __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - [&__comp](const _RefType1 __x, const _RefType2 __y) { - return !__comp(__x, __y) && !__comp(__y, __x); - }, - __is_vector) + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, &__comp](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__brick_mismatch( + __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), + [&__comp](const _RefType1 __x, const _RefType2 __y) { + return !__comp(__x, __y) && !__comp(__y, __x); + }, + _IsVector{}) .first; }, ::std::true_type{}); @@ -4061,14 +4160,14 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _RandomAccessIterat // swap //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, _IsVector __is_vector, _IsParallel __is_parallel) +template +_ForwardIterator2 +__pattern_swap(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, + _ForwardIterator2 __first2, _Function __f) { - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f, __is_vector, - __is_parallel); + static_assert(__is_host_dispatch_tag_v<_Tag>); + + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); } //------------------------------------------------------------------------ @@ -4139,21 +4238,24 @@ __brick_shift_left(_ForwardIterator __first, _ForwardIterator __last, return __first + __size_res; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - typename ::std::iterator_traits<_ForwardIterator>::difference_type __n, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_ForwardIterator +__pattern_shift_left(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + typename ::std::iterator_traits<_ForwardIterator>::difference_type __n) noexcept { - return __brick_shift_left(__first, __last, __n, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_shift_left(__first, __last, __n, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - typename ::std::iterator_traits<_ForwardIterator>::difference_type __n, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator +__pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, + typename ::std::iterator_traits<_RandomAccessIterator>::difference_type __n) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. if (__n <= 0) return __last; @@ -4161,7 +4263,7 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa if (__n >= __size) return __first; - using _DiffType = typename ::std::iterator_traits<_ForwardIterator>::difference_type; + using _DiffType = typename ::std::iterator_traits<_RandomAccessIterator>::difference_type; _DiffType __mid = __size / 2 + __size % 2; _DiffType __size_res = __size - __n; @@ -4169,10 +4271,10 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa //1. n >= size/2; there is enough memory to 'total' parallel copying if (__n >= __mid) { - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __n, __size, - [__first, __n, __is_vector](_DiffType __i, _DiffType __j) { - __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, - __first + __i - __n, __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __size, + [__first, __n](_DiffType __i, _DiffType __j) { + __brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __first + __i, __first + __j, __first + __i - __n, _IsVector{}); }); } else //2. n < size/2; there is not enough memory to parallel copying; doing parallel copying by n elements @@ -4181,10 +4283,10 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa for (auto __k = __n; __k < __size; __k += __n) { auto __end = ::std::min(__k + __n, __size); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __k, __end, - [__first, __n, __is_vector](_DiffType __i, _DiffType __j) { - __brick_move<_ExecutionPolicy>{}(__first + __i, __first + __j, - __first + __i - __n, __is_vector); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __k, __end, + [__first, __n](_DiffType __i, _DiffType __j) { + __brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __first + __i, __first + __j, __first + __i - __n, _IsVector{}); }); } } @@ -4192,16 +4294,18 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forwa return __first + __size_res; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _BidirectionalIterator> -__pattern_shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, - typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n, - _IsVector __is_vector, _IsParallel is_parallel) +template +_BidirectionalIterator +__pattern_shift_right(_Tag __tag, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __last, + typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n) { + static_assert(__is_host_dispatch_tag_v<_Tag>); + using _ReverseIterator = typename ::std::reverse_iterator<_BidirectionalIterator>; - auto __res = oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), - _ReverseIterator(__last), _ReverseIterator(__first), __n, - __is_vector, is_parallel); + + auto __res = oneapi::dpl::__internal::__pattern_shift_left( + __tag, ::std::forward<_ExecutionPolicy>(__exec), _ReverseIterator(__last), _ReverseIterator(__first), __n); return __res.base(); } diff --git a/include/oneapi/dpl/pstl/execution_defs.h b/include/oneapi/dpl/pstl/execution_defs.h index d16a030b216..26287ccbf6e 100644 --- a/include/oneapi/dpl/pstl/execution_defs.h +++ b/include/oneapi/dpl/pstl/execution_defs.h @@ -31,88 +31,20 @@ inline namespace v1 // 2.4, Sequential execution policy class sequenced_policy { - public: - // For internal use only - static constexpr ::std::false_type - __allow_unsequenced() - { - return ::std::false_type{}; - } - static constexpr ::std::false_type - __allow_vector() - { - return ::std::false_type{}; - } - static constexpr ::std::false_type - __allow_parallel() - { - return ::std::false_type{}; - } }; // 2.5, Parallel execution policy class parallel_policy { - public: - // For internal use only - static constexpr ::std::false_type - __allow_unsequenced() - { - return ::std::false_type{}; - } - static constexpr ::std::false_type - __allow_vector() - { - return ::std::false_type{}; - } - static constexpr ::std::true_type - __allow_parallel() - { - return ::std::true_type{}; - } }; // 2.6, Parallel+Vector execution policy class parallel_unsequenced_policy { - public: - // For internal use only - static constexpr ::std::true_type - __allow_unsequenced() - { - return ::std::true_type{}; - } - static constexpr ::std::true_type - __allow_vector() - { - return ::std::true_type{}; - } - static constexpr ::std::true_type - __allow_parallel() - { - return ::std::true_type{}; - } }; class unsequenced_policy { - public: - // For internal use only - static constexpr ::std::true_type - __allow_unsequenced() - { - return ::std::true_type{}; - } - static constexpr ::std::true_type - __allow_vector() - { - return ::std::true_type{}; - } - static constexpr ::std::false_type - __allow_parallel() - { - return ::std::false_type{}; - } }; // 2.8, Execution policy objects @@ -180,14 +112,6 @@ template using __enable_if_execution_policy = ::std::enable_if_t>, _T>; -template -using __enable_if_host_execution_policy = - ::std::enable_if_t<__is_host_execution_policy<::std::decay_t<_ExecPolicy>>::value, _T>; - -template -using __enable_if_host_execution_policy_conditional = - ::std::enable_if_t<__is_host_execution_policy<::std::decay_t<_ExecPolicy>>::value && __condition, _T>; - template struct __ref_or_copy_impl { @@ -213,6 +137,22 @@ __check_size(...) -> typename ::std::iterator_traits<_It>::difference_type; template using __difference_t = ::std::make_signed_t(0))>; +//------------------------------------------------------------------------ +// backend tags +//------------------------------------------------------------------------ + +struct __serial_backend_tag +{ +}; + +struct __tbb_backend_tag +{ +}; + +struct __omp_backend_tag +{ +}; + } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/execution_impl.h b/include/oneapi/dpl/pstl/execution_impl.h index 70631a27114..133717bf68e 100644 --- a/include/oneapi/dpl/pstl/execution_impl.h +++ b/include/oneapi/dpl/pstl/execution_impl.h @@ -19,6 +19,7 @@ #include #include +#include "parallel_backend.h" #include "execution_defs.h" #include "iterator_defs.h" @@ -29,100 +30,134 @@ namespace dpl namespace __internal { -/* predicate */ +//------------------------------------------------------------------------ +// backend selector with tags +//------------------------------------------------------------------------ + +#if _ONEDPL_PAR_BACKEND_TBB +using __par_backend_tag = __tbb_backend_tag; +#elif _ONEDPL_PAR_BACKEND_OPENMP +using __par_backend_tag = __omp_backend_tag; +#elif _ONEDPL_PAR_BACKEND_SERIAL +using __par_backend_tag = __serial_backend_tag; +#else +# error "Parallel backend was not specified" +#endif + +template +struct __serial_tag +{ + using __is_vector = _IsVector; +}; -template -::std::false_type __lazy_and(_Tp, ::std::false_type) +template +struct __parallel_tag { - return ::std::false_type{}; -} + using __is_vector = _IsVector; + using __backend_tag = __par_backend_tag; +}; -template -inline _Tp -__lazy_and(_Tp __a, ::std::true_type) +struct __parallel_forward_tag { - return __a; + using __is_vector = ::std::false_type; + using __backend_tag = __par_backend_tag; +}; + +//---------------------------------------------------------- +// __select_backend (for the host policies) +//---------------------------------------------------------- + +template +using __parallel_policy_tag_selector_t = ::std::conditional_t< + __internal::__is_random_access_iterator_v<_IteratorTypes...>, __parallel_tag<_IsVector>, + ::std::conditional_t<__is_forward_iterator_v<_IteratorTypes...>, __parallel_forward_tag, __serial_tag<_IsVector>>>; + +template +__serial_tag +__select_backend(oneapi::dpl::execution::sequenced_policy, _IteratorTypes&&...) +{ + return {}; } -template -::std::true_type __lazy_or(_Tp, ::std::true_type) +template +__serial_tag<__internal::__is_random_access_iterator<_IteratorTypes...>> +__select_backend(oneapi::dpl::execution::unsequenced_policy, _IteratorTypes&&...) { - return ::std::true_type{}; + return {}; } -template -inline _Tp -__lazy_or(_Tp __a, ::std::false_type) +template +__parallel_policy_tag_selector_t +__select_backend(oneapi::dpl::execution::parallel_policy, _IteratorTypes&&...) { - return __a; + return {}; } -/* policy */ -template -struct __policy_traits +template +__parallel_policy_tag_selector_t<__internal::__is_random_access_iterator<_IteratorTypes...>, _IteratorTypes...> +__select_backend(oneapi::dpl::execution::parallel_unsequenced_policy, _IteratorTypes&&...) { -}; + return {}; +} -template <> -struct __policy_traits +//---------------------------------------------------------- +// __is_serial_tag, __is_serial_tag_v +//---------------------------------------------------------- + +template +struct __is_serial_tag : ::std::false_type { - typedef ::std::false_type __allow_parallel; - typedef ::std::false_type __allow_unsequenced; - typedef ::std::false_type __allow_vector; }; -template <> -struct __policy_traits +template +struct __is_serial_tag<__serial_tag<_IsVector>> : ::std::true_type { - typedef ::std::false_type __allow_parallel; - typedef ::std::true_type __allow_unsequenced; - typedef ::std::true_type __allow_vector; }; -template <> -struct __policy_traits +template +inline constexpr bool __is_serial_tag_v = __is_serial_tag<_Tag>::value; + +//---------------------------------------------------------- +// __is_parallel_forward_tag, __is_parallel_forward_tag_v +//---------------------------------------------------------- + +template +struct __is_parallel_forward_tag : ::std::false_type { - typedef ::std::true_type __allow_parallel; - typedef ::std::false_type __allow_unsequenced; - typedef ::std::false_type __allow_vector; }; template <> -struct __policy_traits +struct __is_parallel_forward_tag<__parallel_forward_tag> : ::std::true_type { - typedef ::std::true_type __allow_parallel; - typedef ::std::true_type __allow_unsequenced; - typedef ::std::true_type __allow_vector; }; -template -using __allow_vector = typename __internal::__policy_traits<::std::decay_t<_ExecutionPolicy>>::__allow_vector; - -template -using __allow_unsequenced = typename __internal::__policy_traits<::std::decay_t<_ExecutionPolicy>>::__allow_unsequenced; +template +inline constexpr bool __is_parallel_forward_tag_v = __is_parallel_forward_tag<_Tag>::value; -template -using __allow_parallel = typename __internal::__policy_traits<::std::decay_t<_ExecutionPolicy>>::__allow_parallel; +//---------------------------------------------------------- +// __is_parallel_tag, __is_parallel_tag_v +//---------------------------------------------------------- -template -auto -__is_vectorization_preferred(_ExecutionPolicy& __exec) - -> decltype(__internal::__lazy_and(__exec.__allow_vector(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>())) +template +struct __is_parallel_tag : ::std::false_type { - return __internal::__lazy_and(__exec.__allow_vector(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>()); -} +}; -template -auto -__is_parallelization_preferred(_ExecutionPolicy& __exec) - -> decltype(__internal::__lazy_and(__exec.__allow_parallel(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>())) +template +struct __is_parallel_tag<__parallel_tag<_IsVector>> : ::std::true_type { - return __internal::__lazy_and(__exec.__allow_parallel(), - __internal::__is_random_access_iterator_t<_IteratorTypes...>()); -} +}; + +template +inline constexpr bool __is_parallel_tag_v = __is_parallel_tag<_Tag>::value; + +//---------------------------------------------------------- +// __is_host_dispatch_tag_v +//---------------------------------------------------------- + +template +inline constexpr bool __is_host_dispatch_tag_v = + __is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag> || __is_parallel_tag_v<_Tag>; } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop.h index 4a61dd7c09b..5d8802083d3 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop.h @@ -56,6 +56,9 @@ template void for_loop(_ExecutionPolicy&& __exec, type_identity_t<_Ip> __start, _Ip __finish, _Rest&&... __rest) { + static_assert(oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, + "for_loop is implemented for the host policies only"); + oneapi::dpl::__internal::__for_loop_repack(::std::forward<_ExecutionPolicy>(__exec), __start, __finish, oneapi::dpl::__internal::__single_stride_type{}, ::std::forward_as_tuple(::std::forward<_Rest>(__rest)...)); @@ -65,6 +68,9 @@ template __start, _Ip __finish, _Sp __stride, _Rest&&... __rest) { + static_assert(oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, + "for_loop_strided is implemented for the host policies only"); + oneapi::dpl::__internal::__for_loop_repack(::std::forward<_ExecutionPolicy>(__exec), __start, __finish, __stride, ::std::forward_as_tuple(::std::forward<_Rest>(__rest)...)); } @@ -73,6 +79,9 @@ template >::value, + "for_loop_n is implemented for the host policies only"); + oneapi::dpl::__internal::__for_loop_repack_n(::std::forward<_ExecutionPolicy>(__exec), __start, __n, oneapi::dpl::__internal::__single_stride_type{}, ::std::forward_as_tuple(::std::forward<_Rest>(__rest)...)); @@ -82,6 +91,9 @@ template >::value, + "for_loop_n_strided is implemented for the host policies only"); + oneapi::dpl::__internal::__for_loop_repack_n(::std::forward<_ExecutionPolicy>(__exec), __start, __n, __stride, ::std::forward_as_tuple(::std::forward<_Rest>(__rest)...)); } diff --git a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h index d7f738036c3..47769c4645d 100644 --- a/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h +++ b/include/oneapi/dpl/pstl/experimental/internal/for_loop_impl.h @@ -65,7 +65,7 @@ struct __difference<_Ip, ::std::enable_if_t<::std::is_integral_v<_Ip>>> template struct __difference<_Ip, ::std::enable_if_t>> { - using __type = typename oneapi::dpl::__internal::__iterator_traits<_Ip>::difference_type; + using __type = typename ::std::iterator_traits<_Ip>::difference_type; }; // This type is used as a stride value when it's known that stride == 1 at compile time(the case of for_loop and for_loop_n). @@ -232,9 +232,9 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function } template -::std::enable_if_t<::std::is_same_v::iterator_category, - ::std::bidirectional_iterator_tag>, - _IndexType> +::std::enable_if_t< + ::std::is_same_v::iterator_category, ::std::bidirectional_iterator_tag>, + _IndexType> __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pack& __pack, _IndexType) noexcept { _IndexType __ordinal_position = 0; @@ -269,11 +269,10 @@ __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pa } template -::std::enable_if_t<::std::is_same_v::iterator_category, - ::std::forward_iterator_tag> || - ::std::is_same_v::iterator_category, - ::std::input_iterator_tag>, - _IndexType> +::std::enable_if_t< + ::std::is_same_v::iterator_category, ::std::forward_iterator_tag> || + ::std::is_same_v::iterator_category, ::std::input_iterator_tag>, + _IndexType> __execute_loop_strided(_Ip __first, _Ip __last, _Function __f, _Sp __stride, _Pack& __pack, _IndexType) noexcept { _IndexType __ordinal_position = 0; @@ -398,26 +397,27 @@ __pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Functio // Create an identity pack object, operations are done on copies of it. const __pack_type __identity{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; + using __backend_tag = typename oneapi::dpl::__internal::__parallel_tag<_IsVector>::__backend_tag; oneapi::dpl::__internal::__except_handler([&]() { - return __par_backend::__parallel_reduce(::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, - [__is_vector, __first, __f](_Size __i, _Size __j, __pack_type __value) { - const auto __subseq_start = __first + __i; - const auto __length = __j - __i; - - oneapi::dpl::__internal::__brick_walk1( - __length, - [&__value, __f, __i, __subseq_start](_Size __idx) { - __value.__apply_func(__f, __subseq_start + __idx, - __i + __idx); - }, - __is_vector); - - return __value; - }, - [](__pack_type __lhs, const __pack_type& __rhs) { - __lhs.__combine(__rhs); - return __lhs; - }) + return __par_backend::__parallel_reduce( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, + [__is_vector, __first, __f](_Size __i, _Size __j, __pack_type __value) { + const auto __subseq_start = __first + __i; + const auto __length = __j - __i; + + oneapi::dpl::__internal::__brick_walk1( + __length, + [&__value, __f, __i, __subseq_start](_Size __idx) { + __value.__apply_func(__f, __subseq_start + __idx, __i + __idx); + }, + __is_vector); + + return __value; + }, + [](__pack_type __lhs, const __pack_type& __rhs) { + __lhs.__combine(__rhs); + return __lhs; + }) .__finalize(__n); }); } @@ -433,9 +433,10 @@ __pattern_for_loop_n(_ExecutionPolicy&& __exec, _Ip __first, _Size __n, _Functio // Create an identity pack object, operations are done on copies of it. const __pack_type __identity{__reduction_pack_tag(), ::std::forward<_Rest>(__rest)...}; + using __backend_tag = typename oneapi::dpl::__internal::__parallel_tag<_IsVector>::__backend_tag; oneapi::dpl::__internal::__except_handler([&]() { return __par_backend::__parallel_reduce( - ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), _Size(0), __n, __identity, [__is_vector, __first, __f, __stride](_Size __i, _Size __j, __pack_type __value) { const auto __subseq_start = __first + __i * __stride; const auto __length = __j - __i; @@ -472,48 +473,25 @@ __pattern_for_loop(_ExecutionPolicy&& __exec, _Ip __first, _Ip __last, _Function // Helper structure to split code functions for integral and iterator types so the return // value can be successfully deduced. -template -struct __use_par_vec_helper; - template -struct __use_par_vec_helper<_Ip, ::std::enable_if_t<::std::is_integral_v<_Ip>>> +struct __use_par_vec_helper { - template - static constexpr auto - __use_vector(_ExecutionPolicy&& __exec) -> decltype(__exec.__allow_vector()) - { - return __exec.__allow_vector(); - } - - template - static constexpr auto - __use_parallel(_ExecutionPolicy&& __exec) -> decltype(__exec.__allow_parallel()) - { - return __exec.__allow_parallel(); - } -}; + using __it_type = std::conditional_t, _Ip*, _Ip>; -template -struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> -{ template static constexpr auto __use_vector(_ExecutionPolicy&& __exec) - -> decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec))) { - return oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec)); + using __tag_type = decltype(oneapi::dpl::__internal::__select_backend(__exec, std::declval<__it_type>())); + return typename __tag_type::__is_vector{}; } template static constexpr auto __use_parallel(_ExecutionPolicy&& __exec) - -> decltype(oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec))) { - return oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _Ip>( - ::std::forward<_ExecutionPolicy>(__exec)); + using __tag_type = decltype(oneapi::dpl::__internal::__select_backend(__exec, std::declval<__it_type>())); + return oneapi::dpl::__internal::__is_parallel_tag<__tag_type>{}; } }; @@ -521,7 +499,6 @@ struct __use_par_vec_helper<_Ip, ::std::enable_if_t>> template auto __use_vectorization(_ExecutionPolicy&& __exec) - -> decltype(__use_par_vec_helper<_Ip>::__use_vector(::std::forward<_ExecutionPolicy>(__exec))) { return __use_par_vec_helper<_Ip>::__use_vector(::std::forward<_ExecutionPolicy>(__exec)); } @@ -529,7 +506,6 @@ __use_vectorization(_ExecutionPolicy&& __exec) template auto __use_parallelization(_ExecutionPolicy&& __exec) - -> decltype(__use_par_vec_helper<_Ip>::__use_parallel(::std::forward<_ExecutionPolicy>(__exec))) { return __use_par_vec_helper<_Ip>::__use_parallel(::std::forward<_ExecutionPolicy>(__exec)); } diff --git a/include/oneapi/dpl/pstl/glue_algorithm_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_impl.h index 952087c68ad..0d17726cc24 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_impl.h @@ -43,10 +43,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_any_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } // [alg.all_of] @@ -75,20 +75,20 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f) { - oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __f, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - __exec.__allow_parallel()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __f); } template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f) { - return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __f, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_walk1_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __n, __f); } // [alg.find] @@ -97,10 +97,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_find_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_find_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } template @@ -129,12 +129,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_find_end( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __s_first); + + return oneapi::dpl::__internal::__pattern_find_end(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __s_last, __pred); } template @@ -152,12 +150,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_find_first_of( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __s_first); + + return oneapi::dpl::__internal::__pattern_find_first_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __s_first, __s_last, __pred); } template @@ -175,22 +171,23 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last) { typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; - return oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, ::std::equal_to<_ValueType>(), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__first_semantic()); + + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, ::std::equal_to<_ValueType>(), + oneapi::dpl::__internal::__first_semantic()); } template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__first_semantic()); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred, + oneapi::dpl::__internal::__first_semantic()); } // [alg.count] @@ -203,12 +200,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy< _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + return oneapi::dpl::__internal::__pattern_count( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__equal_value>( - __value), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + __value)); } template @@ -216,10 +213,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy< _ExecutionPolicy, typename ::std::iterator_traits<_ForwardIterator>::difference_type> count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_count( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_count(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } // [alg.search] @@ -229,12 +226,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_search( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __s_first); + + return oneapi::dpl::__internal::__pattern_search(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __s_first, __s_last, __pred); } template @@ -251,10 +246,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_search_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __count, __value, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_search_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __count, __value, __pred); } template @@ -272,11 +267,11 @@ template copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result) { + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy{}); } template @@ -285,11 +280,11 @@ copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _Forward { using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__brick_copy_n{}); } template @@ -297,12 +292,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _Predicate __pred) { - return oneapi::dpl::__internal::__pattern_copy_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_copy_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __result, __pred); } // [alg.swap] @@ -314,16 +307,14 @@ swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardItera { typedef typename ::std::iterator_traits<_ForwardIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_ForwardIterator2>::reference _ReferenceType2; - return oneapi::dpl::__internal::__pattern_swap( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, - [](_ReferenceType1 __x, _ReferenceType2 __y) { - using ::std::swap; - swap(__x, __y); - }, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_swap(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, [](_ReferenceType1 __x, _ReferenceType2 __y) { + using ::std::swap; + swap(__x, __y); + }); } // [alg.transform] @@ -333,12 +324,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - __exec.__allow_parallel()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__transform_functor<_UnaryOperation>{::std::move(__op)}); } // we can't use non-const __op here @@ -348,12 +338,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator __result, _BinaryOperation __op) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + return oneapi::dpl::__internal::__pattern_walk3( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, - oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - __exec.__allow_parallel()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + oneapi::dpl::__internal::__transform_functor<_BinaryOperation>(::std::move(__op))); } // [alg.transform_if] @@ -364,14 +353,12 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryOperation __op, _UnaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2_transform_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__transform_if_unary_functor<_UnaryOperation, _UnaryPredicate>(::std::move(__op), - ::std::move(__pred)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + ::std::move(__pred))); } template (__exec), __first1, __last1, __first2, __result, - oneapi::dpl::__internal::__transform_if_binary_functor<_BinaryOperation, _BinaryPredicate>(::std::move(__op), - ::std::move(__pred)), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator3>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator3>(__exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result, + oneapi::dpl::__internal::__transform_if_binary_functor<_BinaryOperation, _BinaryPredicate>( + ::std::move(__op), ::std::move(__pred))); } // [alg.replace] @@ -397,13 +382,13 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred, const _Tp& __new_value) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, oneapi::dpl::__internal::__replace_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, - oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred)); } template @@ -423,18 +408,16 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _UnaryPredicate __pred, const _Tp& __new_value) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, oneapi::dpl::__internal::__replace_copy_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, ::std::conditional_t, _UnaryPredicate, oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>>( - __new_value, __pred), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __new_value, __pred)); } template @@ -455,10 +438,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { - oneapi::dpl::__internal::__pattern_fill( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __value, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_fill(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __value); } template @@ -468,10 +451,10 @@ fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const if (__count <= 0) return __first; - return oneapi::dpl::__internal::__pattern_fill_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __value, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_fill_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __count, __value); } // [alg.generate] @@ -479,10 +462,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) { - oneapi::dpl::__internal::__pattern_generate( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __g, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_generate(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __g); } template @@ -492,10 +475,10 @@ generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, _ if (__count <= 0) return __first; - return oneapi::dpl::__internal::__pattern_generate_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __count, __g, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_generate_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __count, __g); } // [alg.remove] @@ -526,10 +509,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_remove_if( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_remove_if(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template @@ -548,10 +531,10 @@ template unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_unique( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_unique(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __pred); } template @@ -567,12 +550,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_unique_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_unique_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __pred); } template @@ -589,10 +570,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last) { - oneapi::dpl::__internal::__pattern_reverse( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_reverse(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last); } template @@ -600,12 +581,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, _ForwardIterator __d_first) { - return oneapi::dpl::__internal::__pattern_reverse_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __d_first); + + return oneapi::dpl::__internal::__pattern_reverse_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __d_first); } // [alg.rotate] @@ -614,10 +593,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last) { - return oneapi::dpl::__internal::__pattern_rotate( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_rotate(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last); } template @@ -625,12 +604,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __middle, _ForwardIterator1 __last, _ForwardIterator2 __result) { - return oneapi::dpl::__internal::__pattern_rotate_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __result, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_rotate_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __middle, __last, __result); } // [alg.partitions] @@ -639,20 +616,20 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_is_partitioned( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_is_partitioned(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_partition( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template @@ -660,10 +637,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Bidirec stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_stable_partition( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_stable_partition(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __pred); } template (__exec), __first, __last, __out_true, __out_false, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, - _ForwardIterator2>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, _ForwardIterator1, - _ForwardIterator2>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __out_true, __out_false); + + return oneapi::dpl::__internal::__pattern_partition_copy(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __out_true, __out_false, __pred); } // [alg.sort] @@ -687,12 +662,12 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + typedef typename ::std::iterator_traits<_RandomAccessIterator>::value_type _InputType; - oneapi::dpl::__internal::__pattern_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - typename ::std::is_move_constructible<_InputType>::type()); + + oneapi::dpl::__internal::__pattern_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __comp, typename ::std::is_move_constructible<_InputType>::type()); } template @@ -709,10 +684,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_stable_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); } template @@ -731,12 +706,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> sort_by_key(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __keys_first, _RandomAccessIterator1 __keys_last, _RandomAccessIterator2 __values_first, _Compare __comp) { - oneapi::dpl::__internal::__pattern_sort_by_key( - ::std::forward<_ExecutionPolicy>(__exec), __keys_first, __keys_last, __values_first, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator1, - _RandomAccessIterator2>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __keys_first, __values_first); + + oneapi::dpl::__internal::__pattern_sort_by_key(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __keys_first, __keys_last, __values_first, __comp); } template @@ -756,12 +729,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__pattern_mismatch( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __pred, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_mismatch(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __pred); } template @@ -802,10 +773,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __p) { - return oneapi::dpl::__internal::__pattern_equal( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __p, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __p); } template @@ -821,10 +792,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __p) { - return oneapi::dpl::__internal::__pattern_equal( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __p, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __p); } template @@ -843,11 +814,11 @@ move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __l { using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __d_first); + return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, - oneapi::dpl::__internal::__brick_move<_DecayedExecutionPolicy>{}, - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, + oneapi::dpl::__internal::__brick_move{}); } // [partial.sort] @@ -857,10 +828,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_partial_sort( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_partial_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __comp); } template @@ -879,12 +850,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomA partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __d_first); + return oneapi::dpl::__internal::__pattern_partial_sort_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator, - _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator, - _RandomAccessIterator>(__exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last, __comp); } template @@ -901,12 +870,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + const _ForwardIterator __res = oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__first_semantic()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__first_semantic()); return __res == __last ? __last : oneapi::dpl::__internal::__pstl_next(__res); } @@ -922,12 +890,12 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__or_semantic()) == __last; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_adjacent_find(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), + oneapi::dpl::__internal::__or_semantic()) == __last; } template @@ -945,12 +913,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __d_first, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __d_first); + + return oneapi::dpl::__internal::__pattern_merge(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, + __last1, __first2, __last2, __d_first, __comp); } template @@ -967,10 +933,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_inplace_merge( - ::std::forward<_ExecutionPolicy>(__exec), __first, __middle, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_inplace_merge(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __middle, __last, __comp); } template @@ -989,12 +955,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_includes( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_includes(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __comp); } template @@ -1014,12 +978,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_union( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return oneapi::dpl::__internal::__pattern_set_union(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template @@ -1039,12 +1001,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_intersection( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return oneapi::dpl::__internal::__pattern_set_intersection(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template @@ -1064,12 +1024,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_set_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + + return oneapi::dpl::__internal::__pattern_set_difference(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __last2, __result, __comp); } template @@ -1090,12 +1048,11 @@ set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2, __result); + return oneapi::dpl::__internal::__pattern_set_symmetric_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2, - _ForwardIterator>(__exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, + __comp); } template @@ -1112,10 +1069,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator> is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_is_heap_until( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_is_heap_until(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template @@ -1130,10 +1087,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_is_heap( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_is_heap(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, __comp); } template @@ -1150,10 +1107,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator> min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_min_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_min_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template @@ -1185,10 +1142,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, ::std::pair<_ForwardIterator, _ForwardIterator>> minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { - return oneapi::dpl::__internal::__pattern_minmax_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_minmax_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __comp); } template @@ -1206,10 +1163,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) { - oneapi::dpl::__internal::__pattern_nth_element( - ::std::forward<_ExecutionPolicy>(__exec), __first, __nth, __last, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _RandomAccessIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + oneapi::dpl::__internal::__pattern_nth_element(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __nth, __last, __comp); } template @@ -1228,12 +1185,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + return oneapi::dpl::__internal::__pattern_lexicographical_compare( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __comp); } template @@ -1252,10 +1207,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward shift_left(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, typename ::std::iterator_traits<_ForwardIterator>::difference_type __n) { - return oneapi::dpl::__internal::__pattern_shift_left( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_shift_left(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __n); } // [shift.right] @@ -1265,10 +1220,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Bidirec shift_right(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, typename ::std::iterator_traits<_BidirectionalIterator>::difference_type __n) { - return oneapi::dpl::__internal::__pattern_shift_right( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __n, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _BidirectionalIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_shift_right(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __n); } } // namespace dpl diff --git a/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h b/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h index bbe5cc72ac8..4e51d0e4e2a 100644 --- a/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h +++ b/include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h @@ -39,7 +39,9 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> any_of(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_any_of(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_any_of(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __pred); } @@ -70,7 +72,9 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> for_each(_ExecutionPolicy&& __exec, _Range&& __rng, _Function __f) { - oneapi::dpl::__internal::__ranges::__pattern_walk_n(::std::forward<_ExecutionPolicy>(__exec), __f, + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + oneapi::dpl::__internal::__ranges::__pattern_walk_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __f, views::all(::std::forward<_Range>(__rng))); } @@ -80,7 +84,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_find_if(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_find_if(__dispatch_tag, + ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __pred); } @@ -111,8 +118,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range1>> find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_find_end( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __pred); } @@ -132,8 +141,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range1>> find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_find_first_of( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __pred); } @@ -152,9 +163,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> adjacent_find(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + return oneapi::dpl::__internal::__ranges::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __pred, - oneapi::dpl::__internal::__first_semantic()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __pred, oneapi::dpl::__internal::__first_semantic()); } template @@ -172,7 +185,9 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> count_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_count(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_count(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __pred); } @@ -193,8 +208,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range1>> search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_search( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __pred); } @@ -211,9 +228,11 @@ template > search_n(_ExecutionPolicy&& __exec, _Range&& __rng, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_search_n(::std::forward<_ExecutionPolicy>(__exec), - views::all_read(::std::forward<_Range>(__rng)), - __count, __value, __pred); + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_search_n( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __count, __value, __pred); } template @@ -230,8 +249,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> copy(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result) { + auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__internal::__brick_copy{}, views::all_read(::std::forward<_Range1>(__rng)), views::all_write(::std::forward<_Range2>(__result))); } @@ -240,8 +262,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range2>> copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _Predicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + return oneapi::dpl::__internal::__ranges::__pattern_copy_if( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)), views::all_write(::std::forward<_Range2>(__result)), __pred, oneapi::dpl::__internal::__pstl_assign()); } @@ -252,11 +276,13 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range1>> swap_ranges(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + using _ReferenceType1 = oneapi::dpl::__internal::__value_t<_Range1>&; using _ReferenceType2 = oneapi::dpl::__internal::__value_t<_Range2>&; return oneapi::dpl::__internal::__ranges::__pattern_swap( - ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range1>(__rng1)), views::all(::std::forward<_Range2>(__rng2)), [](_ReferenceType1 __x, _ReferenceType2 __y) { using ::std::swap; swap(__x, __y); @@ -269,8 +295,10 @@ template transform(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _UnaryOperation __op) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), [__op](auto x, auto& z) { z = __op(x); }, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), [__op](auto x, auto& z) { z = __op(x); }, views::all_read(::std::forward<_Range1>(__rng)), views::all_write(::std::forward<_Range2>(__result))); } @@ -278,8 +306,10 @@ template transform(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __result, _BinaryOperation __op) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2, __result); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), [__op](auto x, auto y, auto& z) { z = __op(x, y); }, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), [__op](auto x, auto y, auto& z) { z = __op(x, y); }, views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), views::all_write(::std::forward<_Range3>(__result))); } @@ -290,8 +320,10 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> remove_if(_ExecutionPolicy&& __exec, _Range&& __rng, _UnaryPredicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_remove_if(::std::forward<_ExecutionPolicy>(__exec), - views::all(::std::forward<_Range>(__rng)), __pred); + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_remove_if( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range>(__rng)), __pred); } template @@ -332,7 +364,9 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> unique(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) { - return oneapi::dpl::__internal::__ranges::__pattern_unique(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_unique(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range>(__rng)), __pred); } @@ -349,8 +383,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range2>> unique_copy(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + return oneapi::dpl::__internal::__ranges::__pattern_unique_copy( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)), views::all_write(::std::forward<_Range2>(__result)), __pred, oneapi::dpl::__internal::__pstl_assign()); } @@ -406,8 +442,10 @@ template replace_if(_ExecutionPolicy&& __exec, _Range&& __rng, _UnaryPredicate __pred, const _Tp& __new_value) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__internal::__replace_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, _UnaryPredicate>>(__new_value, __pred), @@ -431,9 +469,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, replace_copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _UnaryPredicate __pred, const _Tp& __new_value) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng, __result); + auto __src = views::all_read(::std::forward<_Range1>(__rng)); oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__internal::__replace_copy_functor< oneapi::dpl::__internal::__ref_or_copy<_ExecutionPolicy, const _Tp>, ::std::conditional_t, @@ -463,7 +503,9 @@ template sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) { - oneapi::dpl::__internal::__ranges::__pattern_sort(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + oneapi::dpl::__internal::__ranges::__pattern_sort(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all(::std::forward<_Range>(__rng)), __comp, __proj); } @@ -498,10 +540,12 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> is_sorted_until(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + auto __view = views::all_read(::std::forward<_Range>(__rng)); const auto __res = oneapi::dpl::__internal::__ranges::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __view, oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), - oneapi::dpl::__internal::__first_semantic()); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __view, + oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__first_semantic()); return __res == __view.size() ? __res : __res + 1; } @@ -518,9 +562,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool> is_sorted(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + auto __view = views::all_read(::std::forward<_Range>(__rng)); return oneapi::dpl::__internal::__ranges::__pattern_adjacent_find( - ::std::forward<_ExecutionPolicy>(__exec), __view, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __view, oneapi::dpl::__internal::__reorder_pred<_Compare>(__comp), oneapi::dpl::__internal::__or_semantic()) == __view.size(); } @@ -539,7 +585,9 @@ template equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryPredicate __p) { - return oneapi::dpl::__internal::__ranges::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + + return oneapi::dpl::__internal::__ranges::__pattern_equal(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __p); } @@ -558,10 +606,13 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy> move(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2) { + auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + using _DecayedExecutionPolicy = ::std::decay_t<_ExecutionPolicy>; oneapi::dpl::__internal::__ranges::__pattern_walk_n( - ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__internal::__brick_move<_DecayedExecutionPolicy>{}, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__internal::__brick_move{}, views::all_read(::std::forward<_Range1>(__rng1)), views::all_write(::std::forward<_Range2>(__rng2))); } @@ -572,8 +623,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range3>> merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2, __rng3); + return oneapi::dpl::__internal::__ranges::__pattern_merge( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), views::all_write(::std::forward<_Range3>(__rng3)), __comp); } @@ -593,8 +646,11 @@ template oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, oneapi::dpl::__internal::__difference_t<_Range>> min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + return oneapi::dpl::__internal::__ranges::__pattern_min_element( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __comp); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __comp); } template @@ -628,8 +684,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy< ::std::pair, oneapi::dpl::__internal::__difference_t<_Range>>> minmax_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + return oneapi::dpl::__internal::__ranges::__pattern_minmax_element( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), __comp); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __comp); } template @@ -649,8 +708,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op) { + const auto __dispatch_tag = + oneapi::dpl::__ranges::__select_backend(__exec, __keys, __values, __out_keys, __out_values); + return oneapi::dpl::__internal::__ranges::__pattern_reduce_by_segment( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__keys)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__keys)), views::all_read(::std::forward<_Range2>(__values)), views::all_write(::std::forward<_Range3>(__out_keys)), views::all_write(::std::forward<_Range4>(__out_values)), __binary_pred, __binary_op); } diff --git a/include/oneapi/dpl/pstl/glue_memory_impl.h b/include/oneapi/dpl/pstl/glue_memory_impl.h index 082856131e7..fac93889dfb 100644 --- a/include/oneapi/dpl/pstl/glue_memory_impl.h +++ b/include/oneapi/dpl/pstl/glue_memory_impl.h @@ -45,25 +45,19 @@ uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy{}); } else { return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}); } } @@ -75,25 +69,19 @@ uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__brick_copy_n{}); } else { return oneapi::dpl::__internal::__pattern_walk2_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__op_uninitialized_copy<_DecayedExecutionPolicy>{}); } } @@ -107,25 +95,19 @@ uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIter typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__brick_copy<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__brick_copy{}); } else { return oneapi::dpl::__internal::__pattern_walk2( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, - oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}); } } @@ -137,25 +119,19 @@ uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType2; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _InputIterator, _ForwardIterator>( - __exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); if constexpr (::std::is_trivial_v<_ValueType1> && ::std::is_trivial_v<_ValueType2>) { return oneapi::dpl::__internal::__pattern_walk2_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__brick_copy_n<_DecayedExecutionPolicy>{}, __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__brick_copy_n{}); } else { return oneapi::dpl::__internal::__pattern_walk2_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, - oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, __result, + oneapi::dpl::__internal::__op_uninitialized_move<_DecayedExecutionPolicy>{}); } } @@ -168,24 +144,20 @@ uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Forward typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); if constexpr (::std::is_arithmetic_v<_ValueType>) { oneapi::dpl::__internal::__pattern_walk_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__brick_fill{ + _ValueType(__value)}); } else { oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}); } } @@ -196,27 +168,46 @@ uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); if constexpr (::std::is_arithmetic_v<_ValueType>) { return oneapi::dpl::__internal::__pattern_walk_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType(__value)}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__brick_fill_n{ + _ValueType(__value)}); } else { return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_fill<_Tp, _DecayedExecutionPolicy>{__value}); } } +#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) + +const oneapi::dpl::execution::parallel_policy& +get_unvectorized_policy(const oneapi::dpl::execution::parallel_unsequenced_policy&) +{ + return oneapi::dpl::execution::par; +} + +const oneapi::dpl::execution::sequenced_policy& +get_unvectorized_policy(const oneapi::dpl::execution::unsequenced_policy&) +{ + return oneapi::dpl::execution::seq; +} + +template +const _ExecutionPolicy& +get_unvectorized_policy(const _ExecutionPolicy& __exec) +{ + return __exec; +} + +#endif // (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) + // [specialized.destroy] template @@ -226,25 +217,17 @@ destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - using _is_vector_type = + if constexpr (!::std::is_trivially_destructible_v<_ValueType>) + { + const auto __dispatch_tag = #if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) - ::std::conditional_t< - oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, - ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>( - __exec))>; + oneapi::dpl::__internal::__select_backend(get_unvectorized_policy(__exec), __first); #else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); -#endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN - constexpr _is_vector_type __is_vector; + oneapi::dpl::__internal::__select_backend(__exec, __first); +#endif - if constexpr (!::std::is_trivially_destructible_v<_ValueType>) - { - oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [](_ReferenceType __val) { __val.~_ValueType(); }, __is_vector, __is_parallel); + oneapi::dpl::__internal::__pattern_walk1(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, + __last, [](_ReferenceType __val) { __val.~_ValueType(); }); } } @@ -255,29 +238,22 @@ destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n) typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef typename ::std::iterator_traits<_ForwardIterator>::reference _ReferenceType; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - using _is_vector_type = -#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) - ::std::conditional_t< - oneapi::dpl::__internal::__is_host_execution_policy<::std::decay_t<_ExecutionPolicy>>::value, - ::std::false_type, - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>( - __exec))>; -#else - decltype(oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); -#endif // _PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN - constexpr _is_vector_type __is_vector; - if constexpr (::std::is_trivially_destructible_v<_ValueType>) { return oneapi::dpl::__internal::__pstl_next(__first, __n); } else { - return oneapi::dpl::__internal::__pattern_walk1_n(::std::forward<_ExecutionPolicy>(__exec), __first, __n, - [](_ReferenceType __val) { __val.~_ValueType(); }, - __is_vector, __is_parallel); + const auto __dispatch_tag = +#if (_PSTL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN || _ONEDPL_ICPX_OMP_SIMD_DESTROY_WINDOWS_BROKEN) + oneapi::dpl::__internal::__select_backend(get_unvectorized_policy(__exec), __first); +#else + oneapi::dpl::__internal::__select_backend(__exec, __first); +#endif + + return oneapi::dpl::__internal::__pattern_walk1_n(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __n, + [](_ReferenceType __val) { __val.~_ValueType(); }); } } @@ -290,17 +266,13 @@ uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - if constexpr (!::std::is_trivial_v<_ValueType>) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}); } } @@ -311,21 +283,17 @@ uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __ typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - if constexpr (::std::is_trivial_v<_ValueType>) { return oneapi::dpl::__internal::__pstl_next(__first, __n); } else { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_default_construct<_DecayedExecutionPolicy>{}); } } @@ -338,24 +306,20 @@ uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __firs typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); if constexpr (::std::is_trivial_v<_ValueType>) { oneapi::dpl::__internal::__pattern_walk_brick( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__brick_fill<_ValueType, _DecayedExecutionPolicy>{_ValueType()}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__brick_fill{ + _ValueType()}); } else { oneapi::dpl::__internal::__pattern_walk1( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}); } } @@ -366,24 +330,20 @@ uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __fi typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; typedef ::std::decay_t<_ExecutionPolicy> _DecayedExecutionPolicy; - const auto __is_parallel = - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); - const auto __is_vector = - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec); + auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); if constexpr (::std::is_trivial_v<_ValueType>) { return oneapi::dpl::__internal::__pattern_walk_brick_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__brick_fill_n<_ValueType, _DecayedExecutionPolicy>{_ValueType()}, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__brick_fill_n{ + _ValueType()}); } else { return oneapi::dpl::__internal::__pattern_walk1_n( - ::std::forward<_ExecutionPolicy>(__exec), __first, __n, - oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}, __is_vector, - __is_parallel); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __n, + oneapi::dpl::__internal::__op_uninitialized_value_construct<_DecayedExecutionPolicy>{}); } } diff --git a/include/oneapi/dpl/pstl/glue_numeric_impl.h b/include/oneapi/dpl/pstl/glue_numeric_impl.h index f2564db3132..17ed09d0ca4 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_impl.h @@ -70,13 +70,12 @@ transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forward _ForwardIterator2 __first2, _Tp __init) { typedef typename ::std::iterator_traits<_ForwardIterator1>::value_type _InputType; + + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, ::std::plus<_InputType>(), - ::std::multiplies<_InputType>(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, + ::std::plus<_InputType>(), ::std::multiplies<_InputType>()); } template transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { - return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __init, __binary_op1, __binary_op2, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first1, __first2); + + return oneapi::dpl::__internal::__pattern_transform_reduce(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first1, __last1, __first2, __init, __binary_op1, + __binary_op2); } template @@ -98,10 +96,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { - return oneapi::dpl::__internal::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op, __unary_op, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator>(__exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first); + + return oneapi::dpl::__internal::__pattern_transform_reduce(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __init, __binary_op, __unary_op); } // [exclusive.scan] @@ -225,13 +223,11 @@ transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { - return oneapi::dpl::__internal::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, - /*inclusive=*/::std::false_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __init, __binary_op, + /*inclusive=*/::std::false_type()); } // [transform.inclusive.scan] @@ -243,13 +239,11 @@ transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _Tp __init) { - return oneapi::dpl::__internal::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, __init, __binary_op, - /*inclusive=*/::std::true_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __init, __binary_op, + /*inclusive=*/::std::true_type()); } template (__exec), __first, __last, __result, __unary_op, __binary_op, - /*inclusive=*/::std::true_type(), - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __result); + + return oneapi::dpl::__internal::__pattern_transform_scan(__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), + __first, __last, __result, __unary_op, __binary_op, + /*inclusive=*/::std::true_type()); } // [adjacent.difference] @@ -274,16 +266,13 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Forward adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first, _BinaryOperation __op) { - if (__first == __last) return __d_first; + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(__exec, __first, __d_first); + return oneapi::dpl::__internal::__pattern_adjacent_difference( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __op, - oneapi::dpl::__internal::__is_vectorization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec), - oneapi::dpl::__internal::__is_parallelization_preferred<_ExecutionPolicy, _ForwardIterator1, _ForwardIterator2>( - __exec)); + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __op); } template diff --git a/include/oneapi/dpl/pstl/glue_numeric_ranges_impl.h b/include/oneapi/dpl/pstl/glue_numeric_ranges_impl.h index 42d7c6e15a3..521ebee46b7 100644 --- a/include/oneapi/dpl/pstl/glue_numeric_ranges_impl.h +++ b/include/oneapi/dpl/pstl/glue_numeric_ranges_impl.h @@ -63,9 +63,11 @@ template transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + using _ValueType = oneapi::dpl::__internal::__value_t<_Range1>; return oneapi::dpl::__internal::__ranges::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __init, ::std::plus<_ValueType>(), ::std::multiplies<_ValueType>()); } @@ -76,8 +78,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_read(::std::forward<_Range2>(__rng2)), __init, __binary_op1, __binary_op2); } @@ -86,9 +90,11 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp> transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { - return oneapi::dpl::__internal::__ranges::__pattern_transform_reduce(::std::forward<_ExecutionPolicy>(__exec), - views::all_read(::std::forward<_Range>(__rng)), - __init, __binary_op, __unary_op); + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng); + + return oneapi::dpl::__internal::__ranges::__pattern_transform_reduce( + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range>(__rng)), + __init, __binary_op, __unary_op); } // [exclusive.scan] @@ -154,8 +160,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, transform_exclusive_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_write(::std::forward<_Range2>(__rng2)), __unary_op, __init, __binary_op, /*inclusive=*/::std::false_type()); } @@ -169,8 +177,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, transform_inclusive_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _Tp __init) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_write(::std::forward<_Range2>(__rng2)), __unary_op, __init, __binary_op, /*inclusive=*/::std::true_type()); } @@ -182,8 +192,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, transform_inclusive_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryOperation __binary_op, _UnaryOperation __unary_op) { + const auto __dispatch_tag = oneapi::dpl::__ranges::__select_backend(__exec, __rng1, __rng2); + return oneapi::dpl::__internal::__ranges::__pattern_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), + __dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng1)), views::all_write(::std::forward<_Range2>(__rng2)), __unary_op, __binary_op, /*inclusive=*/::std::true_type()); } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 677be98e975..a630c1cc2dd 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -39,10 +39,10 @@ namespace __internal // walk1 //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +void +__pattern_walk1(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _Function __f) { auto __n = __last - __first; if (__n <= 0) @@ -52,9 +52,8 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _ForwardIterator>(); auto __buf = __keep(__first, __last); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), - unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, __exec, unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf.all_view()) .wait(); } @@ -62,13 +61,13 @@ __pattern_walk1(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIte // walk1_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk1_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_ForwardIterator +__pattern_walk1_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, + _Function __f) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f, - /*vector=*/::std::true_type(), /*parallel=*/::std::true_type()); + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f); return __first + __n; } @@ -82,10 +81,11 @@ __pattern_walk1_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + typename _BackendTag, typename _ExecutionPolicy, typename _ForwardIterator1, typename _ForwardIterator2, + typename _Function> +_ForwardIterator2 +__pattern_walk2(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { auto __n = __last1 - __first1; if (__n <= 0) @@ -98,8 +98,8 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI auto __buf2 = __keep2(__first2, __first2 + __n); auto __future_obj = oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, - __buf1.all_view(), __buf2.all_view()); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view()); if constexpr (_IsSync()) __future_obj.wait(); @@ -107,45 +107,42 @@ __pattern_walk2(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI return __first2 + __n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Function __f, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_ForwardIterator2 +__pattern_walk2_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, + _ForwardIterator2 __first2, _Function __f) { - return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f, - ::std::true_type(), ::std::true_type()); + return __pattern_walk2(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, __first2, __f); } //------------------------------------------------------------------------ // swap //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_swap(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __f, /*is_vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) +template +_ForwardIterator2 +__pattern_swap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f) { return __pattern_walk2(::std::forward<_ExecutionPolicy>(__exec), - __first1, __last1, __first2, __f, - ::std::true_type(), ::std::true_type()); + __par_backend_hetero::access_mode::read_write>( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __f); } //------------------------------------------------------------------------ // walk3 //------------------------------------------------------------------------ -template <__par_backend_hetero::access_mode __acc_mode1 = __par_backend_hetero::access_mode::read, +template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +_ForwardIterator3 +__pattern_walk3(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) { auto __n = __last1 - __first1; if (__n <= 0) @@ -158,7 +155,7 @@ __pattern_walk3(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardI auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__acc_mode3, _ForwardIterator3>(); auto __buf3 = __keep3(__first3, __first3 + __n); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, __buf1.all_view(), __buf2.all_view(), __buf3.all_view()) .wait(); @@ -175,18 +172,18 @@ struct __walk_brick_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk_brick(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f, - /*parallel=*/::std::true_type) +template +void +__pattern_walk_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Function __f) { if (__last - __first <= 0) return; __pattern_walk1( + __tag, __par_backend_hetero::make_wrapped_policy<__walk_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first, __last, __f, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + __first, __last, __f); } template @@ -194,15 +191,16 @@ struct __walk_brick_n_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_walk_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f, - /*parallel=*/::std::true_type) +template +_ForwardIterator +__pattern_walk_brick_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, + _Function __f) { __pattern_walk1( + __tag, __par_backend_hetero::make_wrapped_policy<__walk_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first, __first + __n, __f, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + __first, __first + __n, __f); return __first + __n; } @@ -215,15 +213,16 @@ struct __walk2_brick_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Brick __brick, /*parallel*/ ::std::true_type) +template +_ForwardIterator2 +__pattern_walk2_brick(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Brick __brick) { return __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __brick, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __first1, __last1, __first2, __brick); } template @@ -231,17 +230,16 @@ struct __walk2_brick_n_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_brick_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, - _Brick __brick, /*parallel*/ ::std::true_type) +template +_ForwardIterator2 +__pattern_walk2_brick_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _Size __n, _ForwardIterator2 __first2, _Brick __brick) { - return __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_brick_n_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), - __first1, __first1 + __n, __first2, __brick, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __first1, __first1 + __n, __first2, __brick); } //------------------------------------------------------------------------ @@ -253,21 +251,20 @@ struct __walk2_transform_if_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_walk2_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _Function __func, - /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_ForwardIterator2 +__pattern_walk2_transform_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __func) { // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming // values of the output sequence for elements where the predicate is false. return __pattern_walk2( + __tag, __par_backend_hetero::make_wrapped_policy<__walk2_transform_if_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __func, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __first1, __last1, __first2, __func); } template @@ -275,22 +272,21 @@ struct __walk3_transform_if_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator3> -__pattern_walk3_transform_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __func, - /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_ForwardIterator3 +__pattern_walk3_transform_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator3 __first3, + _Function __func) { // Require `read_write` access mode for output sequence to force a copy in for host iterators to capture incoming // values of the output sequence for elements where the predicate is false. - return __pattern_walk3<__par_backend_hetero::access_mode::read, __par_backend_hetero::access_mode::read, - __par_backend_hetero::access_mode::read_write>( + return __pattern_walk3<_BackendTag, __par_backend_hetero::access_mode::read, + __par_backend_hetero::access_mode::read, __par_backend_hetero::access_mode::read_write>( + __tag, __par_backend_hetero::make_wrapped_policy<__walk3_transform_if_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __first2, __first3, __func, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + __first1, __last1, __first2, __first3, __func); } //------------------------------------------------------------------------ @@ -309,18 +305,26 @@ struct fill_functor } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _T& __value, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_ForwardIterator +__pattern_fill(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, const _T& __value) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - fill_functor<_T>{__value}, ::std::true_type{}, ::std::true_type{}); + fill_functor<_T>{__value}); return __last; } +template +_ForwardIterator +__pattern_fill_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, + const _T& __value) +{ + return __pattern_fill(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __value); +} + //------------------------------------------------------------------------ // generate //------------------------------------------------------------------------ @@ -338,24 +342,32 @@ struct generate_functor } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_ForwardIterator +__pattern_generate(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Generator __g) { - __pattern_walk1(::std::forward<_ExecutionPolicy>(__exec), + __pattern_walk1(__tag, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__last), - generate_functor<_Generator>{__g}, ::std::true_type{}, ::std::true_type{}); + generate_functor<_Generator>{__g}); return __last; } +template +_ForwardIterator +__pattern_generate_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _Size __count, _Generator __g) +{ + return __pattern_generate(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + __count, __g); +} + //------------------------------------------------------------------------ // brick_copy, brick_move //------------------------------------------------------------------------ -template -struct __brick_copy_n<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_copy_n<__hetero_tag<_BackendTag>, _ExecutionPolicy> { template void @@ -365,48 +377,46 @@ struct __brick_copy_n<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_het } }; -template -struct __brick_copy<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy> { template - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> + void operator()(_SourceT&& __source, _TargetT&& __target) const { __target = ::std::forward<_SourceT>(__source); } }; -template -struct __brick_move<_ExecutionPolicy, oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy> { template - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> + void operator()(_SourceT&& __source, _TargetT&& __target) const { __target = ::std::move(__source); } }; -template -struct __brick_fill<_SourceT, _ExecutionPolicy, - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_fill<__hetero_tag<_BackendTag>, _ExecutionPolicy, _SourceT> { _SourceT __value; template - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> + void operator()(_TargetT& __target) const { __target = __value; } }; -template -struct __brick_fill_n<_SourceT, _ExecutionPolicy, - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy>> +template +struct __brick_fill_n<__hetero_tag<_BackendTag>, _ExecutionPolicy, _SourceT> { _SourceT __value; template - oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> + void operator()(_TargetT& __target) const { __target = __value; @@ -417,10 +427,10 @@ struct __brick_fill_n<_SourceT, _ExecutionPolicy, // min_element, max_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_min_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp) { if (__first == __last) return __last; @@ -467,7 +477,7 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ auto __buf = __keep(__first, __last); auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -493,10 +503,10 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ // However the solution requires use of custom pattern or substantial redesign of existing parallel_transform_reduce. // -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator, _Iterator>> -__pattern_minmax_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +::std::pair<_Iterator, _Iterator> +__pattern_minmax_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp) { if (__first == __last) return ::std::make_pair(__first, __first); @@ -535,7 +545,7 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator auto __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -547,11 +557,10 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator // adjacent_find //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type, - oneapi::dpl::__internal::__or_semantic) +template +_Iterator +__pattern_adjacent_find(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __predicate, oneapi::dpl::__internal::__or_semantic) { if (__last - __first < 2) return __last; @@ -567,8 +576,8 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() bool result = __par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, - __par_backend_hetero::__parallel_or_tag{}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); // inverted conditional because of @@ -576,11 +585,10 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator return result ? __first : __last; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type, - oneapi::dpl::__internal::__first_semantic) +template +_Iterator +__pattern_adjacent_find(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __predicate, oneapi::dpl::__internal::__first_semantic) { if (__last - __first < 2) return __last; @@ -589,7 +597,7 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, adjacent_find_fn<_BinaryPredicate>>; auto __result = __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first + 1)), @@ -609,11 +617,10 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator // count, count_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy< - _ExecutionPolicy, typename ::std::iterator_traits<_Iterator>::difference_type> -__pattern_count(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type) +template +typename ::std::iterator_traits<_Iterator>::difference_type +__pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Predicate __predicate) { if (__first == __last) return 0; @@ -632,7 +639,7 @@ __pattern_count(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -642,10 +649,10 @@ __pattern_count(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, // any_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +bool +__pattern_any_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Pred __pred) { if (__first == __last) return false; @@ -656,6 +663,7 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, __par_backend_hetero::__parallel_or_tag{}, __buf.all_view()); @@ -665,11 +673,10 @@ __pattern_any_of(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, // equal //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +bool +__pattern_equal(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Pred __pred) { if (__last1 == __first1 || __last2 == __first2 || __last1 - __first1 != __last2 - __first2) return false; @@ -684,7 +691,7 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !__par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, __par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::make_zip_view(__buf1.all_view(), __buf2.all_view())); } @@ -693,24 +700,23 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las // equal version for sequences with equal length //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +bool +__pattern_equal(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Pred __pred) { - return oneapi::dpl::__internal::__pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, - __first2, __first2 + (__last1 - __first1), __pred, - /*vector=*/::std::true_type{}, /*parallel=*/::std::true_type{}); + return oneapi::dpl::__internal::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __first2, __first2 + (__last1 - __first1), __pred); } //------------------------------------------------------------------------ // find_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_find_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Pred __pred, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator +__pattern_find_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Pred __pred) { if (__first == __last) return __last; @@ -718,7 +724,7 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__pred}, ::std::true_type{}); @@ -728,18 +734,18 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last // find_end //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator1 +__pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) { if (__first == __last || __s_last == __s_first || __last - __first < __s_last - __s_first) return __last; if (__last - __first == __s_last - __s_first) { - const bool __res = __pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred, - ::std::true_type(), ::std::true_type()); + const bool __res = + __pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __pred); return __res ? __first : __last; } else @@ -747,7 +753,7 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __l using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -760,10 +766,10 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __l // find_first_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator1 +__pattern_find_first_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) { if (__first == __last || __s_last == __s_first) return __last; @@ -773,7 +779,7 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator // TODO: To check whether it makes sense to iterate over the second sequence in case of // distance(__first, __last) < distance(__s_first, __s_last). return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -790,10 +796,10 @@ class equal_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator1> -__pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator1 +__pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __s_first, _Iterator2 __s_last, _Pred __pred) { if (__s_last == __s_first) return __first; @@ -804,14 +810,14 @@ __pattern_search(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __las if (__last - __first == __s_last - __s_first) { const bool __res = __pattern_equal( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __first, - __last, __s_first, __pred, ::std::true_type(), ::std::true_type()); + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __first, __last, __s_first, __pred); return __res ? __first : __last; } using _Predicate = unseq_backend::multiple_match_pred<_ExecutionPolicy, _Pred>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__s_first), @@ -837,10 +843,11 @@ struct __search_n_unary_predicate } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Size __count, const _Tp& __value, - _BinaryPredicate __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator +__pattern_search_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Size __count, const _Tp& __value, _BinaryPredicate __pred) { if (__count <= 0) return __first; @@ -850,16 +857,15 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __las if (__last - __first == __count) { - return (!__internal::__pattern_any_of(::std::forward<_ExecutionPolicy>(__exec), __first, __last, - __search_n_unary_predicate<_Tp, _BinaryPredicate>{__value, __pred}, - ::std::true_type{}, ::std::true_type{})) + return (!__internal::__pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __search_n_unary_predicate<_Tp, _BinaryPredicate>{__value, __pred})) ? __first : __last; } using _Predicate = unseq_backend::n_elem_match_pred<_ExecutionPolicy, _BinaryPredicate, _Tp, _Size>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__pred, __value, __count}, ::std::true_type{}); @@ -869,10 +875,10 @@ __pattern_search_n(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __las // mismatch //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator1, _Iterator2>> -__pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Pred __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +::std::pair<_Iterator1, _Iterator2> +__pattern_mismatch(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Pred __pred) { auto __n = ::std::min(__last1 - __first1, __last2 - __first2); if (__n <= 0) @@ -883,9 +889,9 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __ auto __first_zip = __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2)); - auto __result = - __par_backend_hetero::__parallel_find(::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, - _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); + auto __result = __par_backend_hetero::__parallel_find( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __first_zip, __first_zip + __n, + _Predicate{equal_predicate<_Pred>{__pred}}, ::std::true_type{}); __n = __result - __first_zip; return ::std::make_pair(__first1 + __n, __first2 + __n); } @@ -894,12 +900,11 @@ __pattern_mismatch(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __ // copy_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy< - _ExecutionPolicy, ::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type>> -__pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _IteratorOrTuple __output_first, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +template +::std::pair<_IteratorOrTuple, typename ::std::iterator_traits<_Iterator1>::difference_type> +__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _IteratorOrTuple __output_first, _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; @@ -914,18 +919,19 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __ oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _IteratorOrTuple>(); auto __buf2 = __keep2(__output_first, __output_first + __n); - auto __res = - __par_backend_hetero::__parallel_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __n, __create_mask_op, __copy_by_mask_op); + auto __res = __par_backend_hetero::__parallel_scan_copy(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, __create_mask_op, + __copy_by_mask_op); ::std::size_t __num_copied = __res.get(); return ::std::make_pair(__output_first + __n, __num_copied); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result_first, - _Predicate __pred, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator2 +__pattern_copy_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __result_first, _Predicate __pred) { using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; @@ -939,8 +945,8 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __la auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); auto __buf2 = __keep2(__result_first, __result_first + __n); - auto __res = __par_backend_hetero::__parallel_copy_if(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __n, __pred); + auto __res = __par_backend_hetero::__parallel_copy_if(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __n, __pred); ::std::size_t __num_copied = __res.get(); return __result_first + __num_copied; @@ -950,12 +956,11 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __la // partition_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, ::std::pair<_Iterator2, _Iterator3>> -__pattern_partition_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result1, - _Iterator3 __result2, _UnaryPredicate __pred, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) +template +::std::pair<_Iterator2, _Iterator3> +__pattern_partition_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result1, _Iterator3 __result2, _UnaryPredicate __pred) { if (__first == __last) return ::std::make_pair(__result1, __result2); @@ -967,7 +972,7 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterato unseq_backend::__partition_by_mask<_ReduceOp, /*inclusive*/ ::std::true_type> __copy_by_mask_op{_ReduceOp{}}; auto __result = __pattern_scan_copy( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __par_backend_hetero::zip( __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result1), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__result2)), @@ -980,10 +985,11 @@ __pattern_partition_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterato // unique_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_unique_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result_first, - _BinaryPredicate __pred, /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator2 +__pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, + _Iterator2 __result_first, _BinaryPredicate __pred) { using _It1DifferenceType = typename ::std::iterator_traits<_Iterator1>::difference_type; unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, oneapi::dpl::__internal::__pstl_assign, @@ -992,8 +998,8 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ __not_pred<_BinaryPredicate>{__pred}}; - auto __result = __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result_first, - __create_mask_op, __copy_by_mask_op); + auto __result = __pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __result_first, __create_mask_op, __copy_by_mask_op); return __result_first + __result.second; } @@ -1007,10 +1013,10 @@ class copy_back_wrapper2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_remove_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Predicate __pred) { if (__last == __first) return __last; @@ -1019,19 +1025,19 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __la oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - auto __copy_last = __pattern_copy_if(__exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + + auto __copy_last = __pattern_copy_if(__tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _BinaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _BinaryPredicate __pred) { if (__last - __first < 2) return __last; @@ -1040,14 +1046,13 @@ __pattern_unique(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __last - __first); auto __copy_first = __buf.get(); - auto __copy_last = __pattern_unique_copy(__exec, __first, __last, __copy_first, __pred, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + auto __copy_last = __pattern_unique_copy(__tag, __exec, __first, __last, __copy_first, __pred); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } //------------------------------------------------------------------------ @@ -1062,10 +1067,10 @@ enum _IsPartitionedReduceType : signed char __true_false }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_is_partitioned(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Predicate __predicate, - /*parallel*/ ::std::true_type, /*vector*/ ::std::true_type) +template +bool +__pattern_is_partitioned(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Predicate __predicate) { if (__last - __first < 2) return true; @@ -1086,7 +1091,7 @@ __pattern_is_partitioned(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator auto __res = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf.all_view()) .get(); @@ -1113,10 +1118,10 @@ struct __is_heap_check } }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _RandomAccessIterator> -__pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, /* vector */ ::std::true_type, /* parallel = */ ::std::true_type) +template +_RandomAccessIterator +__pattern_is_heap_until(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { if (__last - __first < 2) return __last; @@ -1125,16 +1130,16 @@ __pattern_is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return __par_backend_hetero::__parallel_find( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}, ::std::true_type{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Compare __comp, /* vector */ ::std::true_type, /* parallel = */ ::std::true_type) +template +bool +__pattern_is_heap(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp) { if (__last - __first < 2) return true; @@ -1143,7 +1148,7 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran oneapi::dpl::unseq_backend::single_match_pred_by_idx<_ExecutionPolicy, __is_heap_check<_Compare>>; return !__par_backend_hetero::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), _Predicate{__comp}); } @@ -1151,11 +1156,12 @@ __pattern_is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Ran //------------------------------------------------------------------------ // merge //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator3> -__pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, _Iterator2 __first2, - _Iterator2 __last2, _Iterator3 __d_first, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) + +template +_Iterator3 +__pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, + _Iterator2 __first2, _Iterator2 __last2, _Iterator3 __d_first, _Compare __comp) { auto __n1 = __last1 - __first1; auto __n2 = __last2 - __first2; @@ -1166,16 +1172,18 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las //To consider the direct copying pattern call in case just one of sequences is empty. if (__n1 == 0) oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - ::std::true_type()); + __first2, __last2, __d_first, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); else if (__n2 == 0) oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __d_first, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - ::std::true_type()); + __first1, __last1, __d_first, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); else { auto __keep1 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); @@ -1186,19 +1194,21 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __las auto __keep3 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator3>(); auto __buf3 = __keep3(__d_first, __d_first + __n); - __par_backend_hetero::__parallel_merge(::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), - __buf2.all_view(), __buf3.all_view(), __comp) + __par_backend_hetero::__parallel_merge(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf1.all_view(), __buf2.all_view(), __buf3.all_view(), __comp) .wait(); } return __d_first + __n; } + //------------------------------------------------------------------------ // inplace_merge //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_inplace_merge(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __middle, _Iterator __last, - _Compare __comp, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + +template +void +__pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __middle, _Iterator __last, _Compare __comp) { using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; @@ -1212,26 +1222,26 @@ __pattern_inplace_merge(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator auto __copy_first = __buf.get(); auto __copy_last = __copy_first + __n; - __pattern_merge(__exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), - __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), - __comp, ::std::true_type{}, ::std::true_type{}); + __pattern_merge( + __tag, __exec, __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__middle), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last), + __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), __comp); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __copy_first, __copy_last, __first, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __copy_first, __copy_last, __first, __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } //------------------------------------------------------------------------ // sort //------------------------------------------------------------------------ -template +template void -__stable_sort_with_projection(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - _Proj __proj) +__stable_sort_with_projection(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp, _Proj __proj) { if (__last - __first < 2) return; @@ -1239,60 +1249,58 @@ __stable_sort_with_projection(_ExecutionPolicy&& __exec, _Iterator __first, _Ite auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - __par_backend_hetero::__parallel_stable_sort( - ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __comp, __proj).wait(); + __par_backend_hetero::__parallel_stable_sort(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __comp, __proj) + .wait(); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type, /*is_move_constructible=*/::std::true_type) +template +void +__pattern_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp, /*is_move_constructible=*/::std::true_type) { - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::identity{}); } //------------------------------------------------------------------------ // stable_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_stable_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + +template +void +__pattern_stable_sort(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _Compare __comp) { - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp, oneapi::dpl::identity{}); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_sort_by_key(_ExecutionPolicy&& __exec, _Iterator1 __keys_first, _Iterator1 __keys_last, - _Iterator2 __values_first, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +void +__pattern_sort_by_key(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __keys_first, + _Iterator1 __keys_last, _Iterator2 __values_first, _Compare __comp) { - static_assert(::std::is_move_constructible_v::value_type> - && ::std::is_move_constructible_v::value_type>, - "The keys and values should be move constructible in case of parallel execution."); + static_assert(::std::is_move_constructible_v::value_type> && + ::std::is_move_constructible_v::value_type>, + "The keys and values should be move constructible in case of parallel execution."); auto __beg = oneapi::dpl::make_zip_iterator(__keys_first, __values_first); auto __end = __beg + (__keys_last - __keys_first); - __stable_sort_with_projection(::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __comp, + __stable_sort_with_projection(__tag, ::std::forward<_ExecutionPolicy>(__exec), __beg, __end, __comp, [](const auto& __a) { return ::std::get<0>(__a); }); } - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_stable_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _UnaryPredicate __pred) { if (__last == __first) return __last; else if (__last - __first < 2) - return __pattern_any_of(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, ::std::true_type(), - ::std::true_type()) - ? __last - : __first; + return __pattern_any_of(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred) ? __last + : __first; using _ValueType = typename ::std::iterator_traits<_Iterator>::value_type; @@ -1303,42 +1311,39 @@ __pattern_stable_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterat auto __true_result = __true_buf.get(); auto __false_result = __false_buf.get(); - auto copy_result = __pattern_partition_copy(__exec, __first, __last, __true_result, __false_result, __pred, - /*vector=*/::std::true_type{}, /*parallel*/ ::std::true_type{}); + auto copy_result = __pattern_partition_copy(__tag, __exec, __first, __last, __true_result, __false_result, __pred); auto true_count = copy_result.first - __true_result; //TODO: optimize copy back if possible (inplace, decrease number of submits) __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(__exec), - __true_result, copy_result.first, __first, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, copy_result.first, + __first, __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); + __pattern_walk2( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), - __false_result, copy_result.second, __first + true_count, __brick_move<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + __false_result, copy_result.second, __first + true_count, + __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); return __first + true_count; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_partition(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _UnaryPredicate __pred, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +_Iterator +__pattern_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + _UnaryPredicate __pred) { //TODO: consider nonstable approaches - return __pattern_stable_partition(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred, - ::std::true_type(), ::std::true_type()); + return __pattern_stable_partition(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred); } //------------------------------------------------------------------------ // lexicographical_compare //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _Iterator1 __first1, _Iterator1 __last1, - _Iterator2 __first2, _Iterator2 __last2, _Compare __comp, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) +template +bool +__pattern_lexicographical_compare(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 __first1, + _Iterator1 __last1, _Iterator2 __first2, _Iterator2 __last2, _Compare __comp) { //trivial pre-checks if (__first2 == __last2) @@ -1375,7 +1380,7 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _Iterator1 __first1 auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value __buf1.all_view(), __buf2.all_view()) .get(); @@ -1383,11 +1388,11 @@ __pattern_lexicographical_compare(_ExecutionPolicy&& __exec, _Iterator1 __first1 return __ret_idx ? __ret_idx == 1 : (__last1 - __first1) < (__last2 - __first2); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +bool +__pattern_includes(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) { //according to the spec if (__first2 == __last2) @@ -1403,7 +1408,7 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa using __brick_include_type = unseq_backend::__brick_includes<_ExecutionPolicy, _Compare, _Size1, _Size2>; return !__par_backend_hetero::__parallel_or( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__last2), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read>(__first1), @@ -1414,16 +1419,17 @@ __pattern_includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forwa //------------------------------------------------------------------------ // partial_sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + +template +void +__pattern_partial_sort(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, + _Iterator __last, _Compare __comp) { if (__last - __first < 2) return; __par_backend_hetero::__parallel_partial_sort( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__last), __comp) @@ -1459,11 +1465,11 @@ struct __partial_sort_2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutIterator> -__pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InIterator __last, - _OutIterator __out_first, _OutIterator __out_last, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_OutIterator +__pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _InIterator __first, + _InIterator __last, _OutIterator __out_first, _OutIterator __out_last, _Compare __comp) { using _ValueType = typename ::std::iterator_traits<_InIterator>::value_type; @@ -1481,13 +1487,14 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI // If our output buffer is larger than the input buffer, simply copy elements to the output and use // full sort on them. auto __out_end = __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, __out_first, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, __out_first, + __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); - // Use reqular sort as partial_sort isn't required to be stable + // Use regular sort as partial_sort isn't required to be stable __pattern_sort( + __tag, __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), - __out_first, __out_end, __comp, ::std::true_type{}, ::std::true_type{}, ::std::true_type{}); + __out_first, __out_end, __comp, ::std::true_type{}); return __out_end; } @@ -1500,22 +1507,22 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __in_size); auto __buf_first = __buf.get(); + auto __buf_last = __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, __buf_first, - __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_2>(__exec), __first, __last, __buf_first, + __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); auto __buf_mid = __buf_first + __out_size; __par_backend_hetero::__parallel_partial_sort( - __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); return __pattern_walk2( - __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), - __buf_first, __buf_mid, __out_first, __brick_copy<_ExecutionPolicy>{}, ::std::true_type{}, - ::std::true_type{}); + __tag, __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), + __buf_first, __buf_mid, __out_first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } } @@ -1523,10 +1530,10 @@ __pattern_partial_sort_copy(_ExecutionPolicy&& __exec, _InIterator __first, _InI // nth_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_nth_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __nth, _Iterator __last, _Compare __comp, - /*vector*/ ::std::true_type, /*parallel*/ ::std::true_type) +template +void +__pattern_nth_element(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __nth, + _Iterator __last, _Compare __comp) { if (__first == __last || __nth == __last) return; @@ -1534,17 +1541,16 @@ __pattern_nth_element(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ // TODO: check partition-based implementation // - try to avoid host dereference issue // - measure performance of the issue-free implementation - __pattern_partial_sort(::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp, - /*vector*/ ::std::true_type{}, /*parallel*/ ::std::true_type{}); + __pattern_partial_sort(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __nth + 1, __last, __comp); } //------------------------------------------------------------------------ // reverse //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) + +template +void +__pattern_reverse(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last) { auto __n = __last - __first; if (__n <= 0) @@ -1553,7 +1559,7 @@ __pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__reverse_functor::difference_type>{__n}, __n / 2, __buf.all_view()) .wait(); @@ -1562,10 +1568,11 @@ __pattern_reverse(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last //------------------------------------------------------------------------ // reverse_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last, - _ForwardIterator __result, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) + +template +_ForwardIterator +__pattern_reverse_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __last, _ForwardIterator __result) { auto __n = __last - __first; if (__n <= 0) @@ -1578,7 +1585,7 @@ __pattern_reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator>(); auto __buf2 = __keep2(__result, __result + __n); oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__reverse_copy::difference_type>{__n}, __n, __buf1.all_view(), __buf2.all_view()) .wait(); @@ -1599,10 +1606,10 @@ class __rotate_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, _Iterator __last, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator +__pattern_rotate(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_first, + _Iterator __last) { auto __n = __last - __first; if (__n <= 0) @@ -1619,15 +1626,15 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_f const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, __buf.all_view(), __temp_rng); - using _Function = __brick_move<_ExecutionPolicy>; + using _Function = __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>; auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __brick, __n, - __temp_rng, __buf.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __brick, + __n, __temp_rng, __buf.all_view()) .wait(); return __first + (__last - __new_first); @@ -1636,11 +1643,11 @@ __pattern_rotate(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __new_f //------------------------------------------------------------------------ // rotate_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator> -__pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __new_first, - _BidirectionalIterator __last, _ForwardIterator __result, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) + +template +_ForwardIterator +__pattern_rotate_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _BidirectionalIterator __first, + _BidirectionalIterator __new_first, _BidirectionalIterator __last, _ForwardIterator __result) { auto __n = __last - __first; if (__n <= 0) @@ -1656,7 +1663,7 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, __buf1.all_view(), __buf2.all_view()) @@ -1665,12 +1672,12 @@ __pattern_rotate_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, return __result + __n; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_hetero_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, _IsOpDifference) +template +_OutputIterator +__pattern_hetero_set_op(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp, _IsOpDifference) { typedef typename ::std::iterator_traits<_ForwardIterator1>::difference_type _Size1; typedef typename ::std::iterator_traits<_ForwardIterator2>::difference_type _Size2; @@ -1709,7 +1716,7 @@ __pattern_hetero_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ auto __result_size = __par_backend_hetero::__parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( __buf1.all_view(), __buf2.all_view(), oneapi::dpl::__ranges::all_view( @@ -1730,19 +1737,19 @@ __pattern_hetero_set_op(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ return __result + __result_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_intersection(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) { // intersection is empty if (__first1 == __last1 || __first2 == __last2) return __result; - return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, - __result, __comp, unseq_backend::_IntersectionTag()); + return __pattern_hetero_set_op(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + __last2, __result, __comp, unseq_backend::_IntersectionTag()); } //Dummy names to avoid kernel problems @@ -1751,13 +1758,12 @@ class __set_difference_copy_case_1 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) { // {} \ {2}: the difference is empty if (__first1 == __last1) @@ -1767,13 +1773,15 @@ __pattern_set_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, if (__first2 == __last2) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_difference_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } - return __pattern_hetero_set_op(::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, - __result, __comp, unseq_backend::_DifferenceTag()); + return __pattern_hetero_set_op(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, + __last2, __result, __comp, unseq_backend::_DifferenceTag()); } //Dummy names to avoid kernel problems @@ -1787,12 +1795,12 @@ class __set_union_copy_case_2 { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_union(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, + _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, + _OutputIterator __result, _Compare __comp) { if (__first1 == __last1 && __first2 == __last2) return __result; @@ -1801,18 +1809,22 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw if (__first1 == __last1) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first2, __last2, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } //{2} is empty if (__first2 == __last2) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; @@ -1823,15 +1835,17 @@ __pattern_set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _Forw auto __buf = __diff.get(); //1. Calc difference {2} \ {1} - const auto __n_diff = oneapi::dpl::__internal::__pattern_hetero_set_op(__exec,__first2, __last2, __first1, __last1, - __buf,__comp, unseq_backend::_DifferenceTag() - ) - __buf; + const auto __n_diff = + oneapi::dpl::__internal::__pattern_hetero_set_op(__tag, __exec, __first2, __last2, __first1, __last1, __buf, + __comp, unseq_backend::_DifferenceTag()) - + __buf; + //2. Merge {1} and the difference return oneapi::dpl::__internal::__pattern_merge( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_union_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __buf, __buf + __n_diff, __result, __comp, - /*vector=*/::std::true_type(), /*parallel=*/::std::true_type()); + __first1, __last1, __buf, __buf + __n_diff, __result, __comp); } //Dummy names to avoid kernel problems @@ -1862,13 +1876,12 @@ class __set_symmetric_difference_phase_2 // 1. Calc difference {1} \ {2} // 2. Calc difference {2} \ {1} // 3. Merge the differences -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, - _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result, - _Compare __comp, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_OutputIterator +__pattern_set_symmetric_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, + _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, + _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp) { if (__first1 == __last1 && __first2 == __last2) return __result; @@ -1877,18 +1890,22 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 if (__first1 == __last1) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_1>( ::std::forward<_ExecutionPolicy>(__exec)), - __first2, __last2, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first2, __last2, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } //{2} is empty if (__first2 == __last2) { return oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_copy_case_2>( ::std::forward<_ExecutionPolicy>(__exec)), - __first1, __last1, __result, oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::true_type()); + __first1, __last1, __result, + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } typedef typename ::std::iterator_traits<_OutputIterator>::value_type _ValueType; @@ -1904,21 +1921,21 @@ __pattern_set_symmetric_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 //1. Calc difference {1} \ {2} const auto __n_diff_1 = oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_1>(__exec), __first1, __last1, __first2, __last2, __buf_1, __comp, unseq_backend::_DifferenceTag()) - __buf_1; //2. Calc difference {2} \ {1} const auto __n_diff_2 = oneapi::dpl::__internal::__pattern_hetero_set_op( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__set_symmetric_difference_phase_2>(__exec), __first2, __last2, __first1, __last1, __buf_2, __comp, unseq_backend::_DifferenceTag()) - __buf_2; //3. Merge the differences - return oneapi::dpl::__internal::__pattern_merge(::std::forward<_ExecutionPolicy>(__exec), __buf_1, + return oneapi::dpl::__internal::__pattern_merge(__tag, ::std::forward<_ExecutionPolicy>(__exec), __buf_1, __buf_1 + __n_diff_1, __buf_2, __buf_2 + __n_diff_2, __result, - __comp, ::std::true_type(), ::std::true_type()); + __comp); } template @@ -1926,10 +1943,10 @@ class __shift_left_right { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__internal::__difference_t<_Range> __n) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_shift_left(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range __rng, + oneapi::dpl::__internal::__difference_t<_Range> __n) { //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. using _DiffType = oneapi::dpl::__internal::__difference_t<_Range>; @@ -1943,21 +1960,22 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__int //1. n >= size/2; 'size - _n' parallel copying if (__n >= __mid) { - using _Function = __brick_move<_ExecutionPolicy>; + using _Function = __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>; auto __brick = oneapi::dpl::unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; //TODO: to consider use just "read" access mode for a source range and just "write" - for a destination range. auto __src = oneapi::dpl::__ranges::drop_view_simple<_Range, _DiffType>(__rng, __n); auto __dst = oneapi::dpl::__ranges::take_view_simple<_Range, _DiffType>(__rng, __size_res); - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __brick, __size_res, - __src, __dst) + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + __brick, __size_res, __src, __dst) .wait(); } else //2. n < size/2; 'n' parallel copying { auto __brick = unseq_backend::__brick_shift_left<_ExecutionPolicy, _DiffType>{__size, __n}; oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__shift_left_right>( ::std::forward<_ExecutionPolicy>(__exec)), __brick, __n, __rng) @@ -1967,11 +1985,10 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Range __rng, oneapi::dpl::__int return __size_res; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_shift_left(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - typename ::std::iterator_traits<_Iterator>::difference_type __n, /*vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) +template +_Iterator +__pattern_shift_left(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + typename ::std::iterator_traits<_Iterator>::difference_type __n) { //If (n > 0 && n < m), returns first + (m - n). Otherwise, if n > 0, returns first. Otherwise, returns last. auto __size = __last - __first; @@ -1983,16 +2000,15 @@ __pattern_shift_left(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __l auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - auto __res = - oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __n); + auto __res = oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __buf.all_view(), __n); return __first + __res; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator> -__pattern_shift_right(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, - typename ::std::iterator_traits<_Iterator>::difference_type __n, /*vector=*/::std::true_type, - /*is_parallel=*/::std::true_type) +template +_Iterator +__pattern_shift_right(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, + typename ::std::iterator_traits<_Iterator>::difference_type __n) { //If (n > 0 && n < m), returns first + n. Otherwise, if n > 0, returns last. Otherwise, returns first. auto __size = __last - __first; @@ -2006,7 +2022,8 @@ __pattern_shift_right(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __ //A shift right is the shift left with a reverse logic. auto __rng = oneapi::dpl::__ranges::reverse_view_simple{__buf.all_view()}; - auto __res = oneapi::dpl::__internal::__pattern_shift_left(::std::forward<_ExecutionPolicy>(__exec), __rng, __n); + auto __res = + oneapi::dpl::__internal::__pattern_shift_left(__tag, ::std::forward<_ExecutionPolicy>(__exec), __rng, __n); return __last - __res; } diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index bee1d1f1f69..25ce34a8680 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -39,14 +39,14 @@ namespace __ranges // walk_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_walk_n(_ExecutionPolicy&& __exec, _Function __f, _Ranges&&... __rngs) +template +void +__pattern_walk_n(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Function __f, _Ranges&&... __rngs) { auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); if (__n > 0) { - oneapi::dpl::__par_backend_hetero::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), unseq_backend::walk_n<_ExecutionPolicy, _Function>{__f}, __n, ::std::forward<_Ranges>(__rngs)...) .wait(); @@ -67,13 +67,15 @@ class __swap2_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_swap(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Function __f) +template +bool +__pattern_swap(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Function __f) { if (__rng1.size() <= __rng2.size()) { oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__swap1_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, __rng1, __rng2); @@ -81,6 +83,7 @@ __pattern_swap(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _F } oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__swap2_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, __rng2, __rng1); @@ -91,9 +94,9 @@ __pattern_swap(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _F // equal //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +bool +__pattern_equal(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) { if (__rng1.empty() || __rng2.empty() || __rng1.size() != __rng2.size()) return false; @@ -103,7 +106,7 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ // TODO: in case of confilicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !oneapi::dpl::__par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, oneapi::dpl::__par_backend_hetero::__parallel_or_tag{}, oneapi::dpl::__ranges::zip_view(::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2))); } @@ -112,10 +115,9 @@ __pattern_equal(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ // find_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_find_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) { //trivial pre-checks if (__rng.empty()) @@ -125,6 +127,7 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) using _TagType = oneapi::dpl::__par_backend_hetero::__parallel_find_forward_tag<_Range>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range>(__rng)); @@ -134,10 +137,10 @@ __pattern_find_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) // find_end //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_find_end(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng1.empty() || __rng2.empty() || __rng1.size() < __rng2.size()) @@ -145,8 +148,8 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2 if (__rng1.size() == __rng2.size()) { - const bool __res = __pattern_equal(::std::forward<_ExecutionPolicy>(__exec), __rng1, - ::std::forward<_Range2>(__rng2), __pred); + const bool __res = __ranges::__pattern_equal(__tag, ::std::forward<_ExecutionPolicy>(__exec), __rng1, + ::std::forward<_Range2>(__rng2), __pred); return __res ? 0 : __rng1.size(); } @@ -154,6 +157,7 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2 using _TagType = __par_backend_hetero::__parallel_find_backward_tag<_Range1>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); @@ -163,10 +167,10 @@ __pattern_find_end(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2 // find_first_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_find_first_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng1.empty() || __rng2.empty()) @@ -177,6 +181,7 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& _ //TODO: To check whether it makes sense to iterate over the second sequence in case of __rng1.size() < __rng2.size() return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); @@ -186,15 +191,16 @@ __pattern_find_first_of(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& _ // any_of //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, bool> -__pattern_any_of(_ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) +template +bool +__pattern_any_of(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Pred __pred) { if (__rng.empty()) return false; using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, _Pred>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + _BackendTag{}, __par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, oneapi::dpl::__par_backend_hetero::__parallel_or_tag{}, ::std::forward<_Range>(__rng)); @@ -209,10 +215,10 @@ class equal_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Pred __pred) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_search(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Pred __pred) { //trivial pre-checks if (__rng2.empty()) @@ -222,8 +228,8 @@ __pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, if (__rng1.size() == __rng2.size()) { - const bool __res = __pattern_equal( - __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), + const bool __res = __ranges::__pattern_equal( + __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __rng1, ::std::forward<_Range2>(__rng2), __pred); return __res ? 0 : __rng1.size(); } @@ -232,8 +238,9 @@ __pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, using _TagType = oneapi::dpl::__par_backend_hetero::__parallel_find_forward_tag<_Range1>; return oneapi::dpl::__par_backend_hetero::__parallel_find_or( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy - (::std::forward<_ExecutionPolicy>(__exec)), + _BackendTag{}, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy< + oneapi::dpl::__par_backend_hetero::__find_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), _Predicate{__pred}, _TagType{}, ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)); } @@ -241,18 +248,19 @@ __pattern_search(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, // search_n //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_search_n(_ExecutionPolicy&& __exec, _Range&& __rng, _Size __count, const _Tp& __value, - _BinaryPredicate __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_search_n(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range&& __rng, _Size __count, + const _Tp& __value, _BinaryPredicate __pred) { //TODO: To consider definition a kind of special factory "multiple_view" (addition to standard "single_view"). //The factory "multiple_view" would generate a range of N identical values. auto __s_rng = oneapi::dpl::experimental::ranges::views::iota(0, __count) | oneapi::dpl::experimental::ranges::views::transform([__value](auto) { return __value; }); - return __pattern_search(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __s_rng, __pred); + return __ranges::__pattern_search(__tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), + __s_rng, __pred); } template @@ -273,11 +281,11 @@ return_value(_Size __res, _Size __size, ::std::false_type) // adjacent_find //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_adjacent_find(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __predicate, - _OrFirstTag __is__or_semantic) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_adjacent_find(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, + _BinaryPredicate __predicate, _OrFirstTag __is__or_semantic) { if (__rng.size() < 2) return __rng.size(); @@ -290,21 +298,21 @@ __pattern_adjacent_find(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredic auto __rng1 = __rng | oneapi::dpl::experimental::ranges::views::take(__rng.size() - 1); auto __rng2 = __rng | oneapi::dpl::experimental::ranges::views::drop(1); - // TODO: in case of confilicting names + // TODO: in case of conflicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() auto result = oneapi::dpl::__par_backend_hetero::__parallel_find_or( - ::std::forward<_ExecutionPolicy>(__exec), _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, - _TagType{}, oneapi::dpl::__ranges::zip_view(__rng1, __rng2)); + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), + _Predicate{adjacent_find_fn<_BinaryPredicate>{__predicate}}, _TagType{}, + oneapi::dpl::__ranges::zip_view(__rng1, __rng2)); // inverted conditional because of // reorder_predicate in glue_algorithm_impl.h return return_value(result, __rng.size(), __is__or_semantic); } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_count(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicate) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_count(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicate) { if (__rng.size() == 0) return 0; @@ -320,7 +328,7 @@ __pattern_count(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicat return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) .get(); @@ -330,11 +338,11 @@ __pattern_count(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __predicat // copy_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range1>> -__pattern_scan_copy(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _CreateMaskOp __create_mask_op, - _CopyByMaskOp __copy_by_mask_op) +template +oneapi::dpl::__internal::__difference_t<_Range1> +__pattern_scan_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) { if (__rng1.size() == 0) return __rng1.size(); @@ -356,7 +364,7 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng auto __res = __par_backend_hetero::__parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::zip_view( __rng1, oneapi::dpl::__ranges::all_view( __mask_buf.get_buffer())), @@ -376,11 +384,11 @@ __pattern_scan_copy(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng return __res; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Predicate __pred, _Assign) +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_copy_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Predicate __pred, _Assign) { using _SizeType = decltype(__rng1.size()); using _ReduceOp = ::std::plus<_SizeType>; @@ -388,18 +396,18 @@ __pattern_copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, unseq_backend::__create_mask<_Predicate, _SizeType> __create_mask_op{__pred}; unseq_backend::__copy_by_mask<_ReduceOp, _Assign, /*inclusive*/ ::std::true_type, 1> __copy_by_mask_op; - return __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), __create_mask_op, __copy_by_mask_op); + return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + __create_mask_op, __copy_by_mask_op); } //------------------------------------------------------------------------ // remove_if //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_remove_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred) { if (__rng.size() == 0) return __rng.size(); @@ -409,13 +417,14 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __rng.size()); auto __copy_rng = oneapi::dpl::__ranges::views::all(__buf.get_buffer()); - auto __copy_last_id = __pattern_copy_if(__exec, __rng, __copy_rng, __not_pred<_Predicate>{__pred}, - oneapi::dpl::__internal::__pstl_assign()); + auto __copy_last_id = __ranges::__pattern_copy_if(__tag, __exec, __rng, __copy_rng, __not_pred<_Predicate>{__pred}, + oneapi::dpl::__internal::__pstl_assign()); auto __copy_rng_truncated = __copy_rng | oneapi::dpl::experimental::ranges::views::take(__copy_last_id); - oneapi::dpl::__internal::__ranges::__pattern_walk_n(::std::forward<_ExecutionPolicy>(__exec), - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, - __copy_rng_truncated, ::std::forward<_Range>(__rng)); + oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, ::std::forward<_ExecutionPolicy>(__exec), + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, __copy_rng_truncated, + ::std::forward<_Range>(__rng)); return __copy_last_id; } @@ -424,11 +433,11 @@ __pattern_remove_if(_ExecutionPolicy&& __exec, _Range&& __rng, _Predicate __pred // unique_copy //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_unique_copy(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _BinaryPredicate __pred, _Assign) +template +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_unique_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, + _BinaryPredicate __pred, _Assign) { using _It1DifferenceType = oneapi::dpl::__internal::__difference_t<_Range1>; unseq_backend::__copy_by_mask<::std::plus<_It1DifferenceType>, _Assign, /*inclusive*/ ::std::true_type, 1> @@ -436,18 +445,18 @@ __pattern_unique_copy(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __re __create_mask_unique_copy<__not_pred<_BinaryPredicate>, _It1DifferenceType> __create_mask_op{ __not_pred<_BinaryPredicate>{__pred}}; - return __pattern_scan_copy(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng), - ::std::forward<_Range2>(__result), __create_mask_op, __copy_by_mask_op); + return __ranges::__pattern_scan_copy(__tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__rng), ::std::forward<_Range2>(__result), + __create_mask_op, __copy_by_mask_op); } //------------------------------------------------------------------------ // unique //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_unique(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __pred) { if (__rng.size() == 0) return __rng.size(); @@ -456,10 +465,12 @@ __pattern_unique(_ExecutionPolicy&& __exec, _Range&& __rng, _BinaryPredicate __p oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __rng.size()); auto res_rng = oneapi::dpl::__ranges::views::all(__buf.get_buffer()); - auto res = __pattern_unique_copy(__exec, __rng, res_rng, __pred, oneapi::dpl::__internal::__pstl_assign()); + auto res = __ranges::__pattern_unique_copy(__tag, __exec, __rng, res_rng, __pred, + oneapi::dpl::__internal::__pstl_assign()); - __pattern_walk_n(::std::forward<_ExecutionPolicy>(__exec), __brick_copy<_ExecutionPolicy>{}, res_rng, - ::std::forward<_Range>(__rng)); + __ranges::__pattern_walk_n(__tag, ::std::forward<_ExecutionPolicy>(__exec), + __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, res_rng, + ::std::forward<_Range>(__rng)); return res; } @@ -477,10 +488,11 @@ class __copy2_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__pattern_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) +template +oneapi::dpl::__internal::__difference_t<_Range3> +__pattern_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Range3&& __rng3, _Compare __comp) { auto __n1 = __rng1.size(); auto __n2 = __rng2.size(); @@ -492,22 +504,24 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ if (__n1 == 0) { oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy1_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3)); + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, + ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3)); } else if (__n2 == 0) { oneapi::dpl::__internal::__ranges::__pattern_walk_n( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy2_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), - oneapi::dpl::__internal::__brick_copy<_ExecutionPolicy>{}, ::std::forward<_Range1>(__rng1), - ::std::forward<_Range3>(__rng3)); + oneapi::dpl::__internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}, + ::std::forward<_Range1>(__rng1), ::std::forward<_Range3>(__rng3)); } else { - __par_backend_hetero::__parallel_merge(::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::__parallel_merge(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp) .wait(); @@ -520,12 +534,12 @@ __pattern_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _ // sort //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) +template +void +__pattern_sort(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) { if (__rng.size() >= 2) - __par_backend_hetero::__parallel_stable_sort(::std::forward<_ExecutionPolicy>(__exec), + __par_backend_hetero::__parallel_stable_sort(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __comp, __proj) .wait(); } @@ -534,10 +548,9 @@ __pattern_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj // min_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range>> -__pattern_min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) +template +oneapi::dpl::__internal::__difference_t<_Range> +__pattern_min_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { //If size == 1, result is the zero-indexed element. If size == 0, result is 0. if (__rng.size() < 2) @@ -562,7 +575,7 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp auto __ret_idx = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) .get(); @@ -575,11 +588,9 @@ __pattern_min_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp // minmax_element //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy< - _ExecutionPolicy, - ::std::pair, oneapi::dpl::__internal::__difference_t<_Range>>> -__pattern_minmax_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) +template +::std::pair, oneapi::dpl::__internal::__difference_t<_Range>> +__pattern_minmax_element(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) { //If size == 1, result is the zero-indexed element. If size == 0, result is 0. if (__rng.size() < 2) @@ -618,7 +629,7 @@ __pattern_minmax_element(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __c _ReduceValueType __ret = oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_ReduceValueType, ::std::false_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __reduce_fn, __transform_fn, unseq_backend::__no_init_value{}, // no initial value ::std::forward<_Range>(__rng)) .get(); @@ -661,12 +672,12 @@ class __assign_key2_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range3>> -__pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2&& __values, _Range3&& __out_keys, - _Range4&& __out_values, _BinaryPredicate __binary_pred, _BinaryOperator __binary_op) +template +oneapi::dpl::__internal::__difference_t<_Range3> +__pattern_reduce_by_segment(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __keys, + _Range2&& __values, _Range3&& __out_keys, _Range4&& __out_values, + _BinaryPredicate __binary_pred, _BinaryOperator __binary_op) { // The algorithm reduces values in __values where the // associated keys for the values are equal to the adjacent key. @@ -684,15 +695,16 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 if (__n == 1) { - __brick_copy<_ExecutionPolicy> __copy_range{}; + __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy> __copy_range{}; oneapi::dpl::__internal::__ranges::__pattern_walk_n( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_keys_wrapper>(__exec), - __copy_range, ::std::forward<_Range1>(__keys), ::std::forward<_Range3>(__out_keys)); + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_keys_wrapper>(__exec), __copy_range, + ::std::forward<_Range1>(__keys), ::std::forward<_Range3>(__out_keys)); oneapi::dpl::__internal::__ranges::__pattern_walk_n( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_values_wrapper> - (::std::forward<_ExecutionPolicy>(__exec)), + __tag, + oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__copy_values_wrapper>( + ::std::forward<_ExecutionPolicy>(__exec)), __copy_range, ::std::forward<_Range2>(__values), ::std::forward<_Range4>(__out_values)); return 1; @@ -730,8 +742,8 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 // evenly divisible by wg size (ensures segments are not long), or has a key not equal to the // adjacent element (marks end of real segments) // TODO: replace wgroup size with segment size based on platform specifics. - auto __intermediate_result_end = __pattern_copy_if( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__assign_key1_wrapper>(__exec), __view1, __view2, + auto __intermediate_result_end = __ranges::__pattern_copy_if( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__assign_key1_wrapper>(__exec), __view1, __view2, [__n, __binary_pred, __wgroup_size](const auto& __a) { // The size of key range for the (i-1) view is one less, so for the 0th index we do not check the keys // for (i-1), but we still need to get its key value as it is the start of a segment @@ -745,7 +757,7 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 //reduce by segment oneapi::dpl::__par_backend_hetero::__parallel_for( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce1_wrapper>(__exec), + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce1_wrapper>(__exec), unseq_backend::__brick_reduce_idx<_BinaryOperator, decltype(__n)>(__binary_op, __n), __intermediate_result_end, oneapi::dpl::__ranges::take_view_simple(experimental::ranges::views::all_read(__idx), __intermediate_result_end), @@ -773,8 +785,8 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 // element is copied if it is the 0th element (marks beginning of first segment), or has a key not equal to // the adjacent element (end of a segment). Artificial segments based on wg size are not created. - auto __result_end = __pattern_copy_if( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__assign_key2_wrapper>(__exec), __view3, __view4, + auto __result_end = __ranges::__pattern_copy_if( + __tag, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__assign_key2_wrapper>(__exec), __view3, __view4, [__binary_pred](const auto& __a) { // The size of key range for the (i-1) view is one less, so for the 0th index we do not check the keys // for (i-1), but we still need to get its key value as it is the start of a segment @@ -786,6 +798,7 @@ __pattern_reduce_by_segment(_ExecutionPolicy&& __exec, _Range1&& __keys, _Range2 //reduce by segment oneapi::dpl::__par_backend_hetero::__parallel_for( + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__reduce2_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), unseq_backend::__brick_reduce_idx<_BinaryOperator, decltype(__intermediate_result_end)>( diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h index de875bee324..e1e06e17e96 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/execution_sycl_defs.h @@ -18,6 +18,7 @@ #include "../../onedpl_config.h" #include "../../execution_defs.h" +#include "../../iterator_defs.h" #include "sycl_defs.h" @@ -59,24 +60,6 @@ class device_policy return q; } - // For internal use only - static constexpr ::std::true_type - __allow_unsequenced() - { - return ::std::true_type{}; - } - // __allow_vector is needed for __is_vectorization_preferred - static constexpr ::std::true_type - __allow_vector() - { - return ::std::true_type{}; - } - static constexpr ::std::true_type - __allow_parallel() - { - return ::std::true_type{}; - } - private: sycl::queue q; }; @@ -98,7 +81,7 @@ class fpga_policy : public device_policy # else __dpl_sycl::__fpga_selector() # endif // _ONEDPL_FPGA_EMU - )) + )) { } @@ -106,14 +89,6 @@ class fpga_policy : public device_policy fpga_policy(const fpga_policy& other) : base(other.queue()){}; explicit fpga_policy(sycl::queue q) : base(q) {} explicit fpga_policy(sycl::device d) : base(d) {} - - // For internal use only - - const base& - __device_policy() const - { - return static_cast(*this); - }; }; #endif // _ONEDPL_FPGA_DEVICE @@ -311,6 +286,66 @@ using __enable_if_device_execution_policy_double_no_default = __is_convertible_to_event<_Events...>, _T>; +template +struct __hetero_tag +{ + using __backend_tag = _BackendTag; +}; + +struct __device_backend_tag +{ +}; + +//---------------------------------------------------------- +// __select_backend (for the hetero policies) +//---------------------------------------------------------- + +template +__hetero_tag<__device_backend_tag> +__select_backend(const execution::device_policy<_KernelName>&, _IteratorTypes&&...) +{ + static_assert(__is_random_access_iterator_v<_IteratorTypes...>); + return {}; +} + +#if _ONEDPL_FPGA_DEVICE +struct __fpga_backend_tag : __device_backend_tag +{ +}; + +template +__hetero_tag<__fpga_backend_tag> +__select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _IteratorTypes&&...) +{ + static_assert(__is_random_access_iterator_v<_IteratorTypes...>); + return {}; +} +#endif + +//---------------------------------------------------------- +// __is_hetero_backend_tag, __is_hetero_backend_tag_v +//---------------------------------------------------------- + +template +struct __is_hetero_backend_tag : ::std::false_type +{ +}; + +template <> +struct __is_hetero_backend_tag<__device_backend_tag> : ::std::true_type +{ +}; + +#if _ONEDPL_FPGA_DEVICE +template <> +struct __is_hetero_backend_tag<__fpga_backend_tag> : ::std::true_type +{ +}; +#endif + +template +inline constexpr bool __is_hetero_backend_tag_v = __is_hetero_backend_tag<_BackendTag>::value; + } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index e0152a4006a..d60d5b3626e 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -244,10 +244,10 @@ struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>> //General version of parallel_for, one additional parameter - __count of iterations of loop __cgh.parallel_for, //for some algorithms happens that size of processing range is n, but amount of iterations is n/2. -template = 0, typename... _Ranges> +template auto -__parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, + _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _ForKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<_CustomName>; @@ -659,9 +659,10 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W template auto -__parallel_transform_scan_single_group(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, - ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, - _BinaryOperation __binary_op, _Inclusive) +__parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, ::std::size_t __n, + _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, + _Inclusive) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -735,12 +736,11 @@ __parallel_transform_scan_single_group(_ExecutionPolicy&& __exec, _InRng&& __in_ } template = 0> + typename _LocalScan, typename _GroupScan, typename _GlobalScan> auto -__parallel_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, - _BinaryOperation __binary_op, _InitType __init, _LocalScan __local_scan, - _GroupScan __group_scan, _GlobalScan __global_scan) +__parallel_transform_scan_base(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + _Range1&& __in_rng, _Range2&& __out_rng, _BinaryOperation __binary_op, _InitType __init, + _LocalScan __local_scan, _GroupScan __group_scan, _GlobalScan __global_scan) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -753,11 +753,11 @@ __parallel_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _R } template = 0> + typename _BinaryOperation, typename _Inclusive> auto -__parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) +__parallel_transform_scan(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, + _InitType __init, _BinaryOperation __binary_op, _Inclusive) { using _Type = typename _InitType::__value_type; @@ -779,7 +779,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2 if (__n <= __single_group_upper_limit && __max_slm_size >= __req_slm_size) { return __parallel_transform_scan_single_group( - std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), + __backend_tag, std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, __unary_op, __init, __binary_op, _Inclusive{}); } } @@ -796,7 +796,7 @@ __parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2 return __future( __parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __binary_op, __init, // local scan unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, @@ -852,11 +852,11 @@ struct __invoke_single_group_copy_if }; template = 0> + typename _CopyByMaskOp> auto -__parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) +__parallel_scan_copy(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _CreateMaskOp __create_mask_op, + _CopyByMaskOp __copy_by_mask_op) { using _ReduceOp = ::std::plus<_Size>; using _Assigner = unseq_backend::__scan_assigner; @@ -874,7 +874,7 @@ __parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __o oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, int32_t> __mask_buf(__exec, __n); return __parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), oneapi::dpl::__ranges::make_zip_view( ::std::forward<_InRng>(__in_rng), oneapi::dpl::__ranges::all_view( @@ -892,10 +892,10 @@ __parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __o __copy_by_mask_op); } -template = 0> +template auto -__parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) +__parallel_copy_if(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) { using _SingleGroupInvoker = __invoke_single_group_copy_if<_Size>; @@ -930,8 +930,9 @@ __parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out using CopyOp = unseq_backend::__copy_by_mask<_ReduceOp, oneapi::dpl::__internal::__pstl_assign, /*inclusive*/ ::std::true_type, 1>; - return __parallel_scan_copy(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_InRng>(__in_rng), - ::std::forward<_OutRng>(__out_rng), __n, CreateOp{__pred}, CopyOp{}); + return __parallel_scan_copy(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, + CreateOp{__pred}, CopyOp{}); } } @@ -1071,12 +1072,11 @@ struct __early_exit_find_or // Base pattern for __parallel_or and __parallel_find. The execution depends on tag type _BrickTag. template -oneapi::dpl::__internal::__enable_if_device_execution_policy< - _ExecutionPolicy, - ::std::conditional_t<::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, - oneapi::dpl::__internal::__difference_t< - typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>>> -__parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, _Ranges&&... __rngs) +::std::conditional_t< + ::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, + oneapi::dpl::__internal::__difference_t::type>> +__parallel_find_or(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Brick __f, + _BrickTag __brick_tag, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _AtomicType = typename _BrickTag::_AtomicType; @@ -1182,9 +1182,9 @@ class __or_policy_wrapper }; template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f) +bool +__parallel_or(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf = __keep(__first, __last); @@ -1192,6 +1192,7 @@ __parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, auto __s_buf = __s_keep(__s_first, __s_last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag, __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, __parallel_or_tag{}, __buf.all_view(), __s_buf.all_view()); } @@ -1200,13 +1201,15 @@ __parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) +bool +__parallel_or(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, _Iterator __first, + _Iterator __last, _Brick __f) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); return oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag, __par_backend_hetero::make_wrapped_policy<__or_policy_wrapper>(::std::forward<_ExecutionPolicy>(__exec)), __f, __parallel_or_tag{}, __buf.all_view()); } @@ -1221,9 +1224,9 @@ class __find_policy_wrapper }; template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, _Iterator1> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f, _IsFirst) +_Iterator1 +__parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, _Iterator2 __s_last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator1>(); auto __buf = __keep(__first, __last); @@ -1233,6 +1236,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view(), __s_buf.all_view()); @@ -1242,8 +1246,9 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last // TODO: check if similar pattern may apply to other algorithms. If so, these overloads should be moved out of // backend code. template -oneapi::dpl::__internal::__enable_if_device_execution_policy<_ExecutionPolicy, _Iterator> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) +_Iterator +__parallel_find(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator __first, _Iterator __last, _Brick __f, _IsFirst) { auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf = __keep(__first, __last); @@ -1251,6 +1256,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, using _TagType = ::std::conditional_t<_IsFirst::value, __parallel_find_forward_tag, __parallel_find_backward_tag>; return __first + oneapi::dpl::__par_backend_hetero::__parallel_find_or( + __backend_tag, __par_backend_hetero::make_wrapped_policy<__find_policy_wrapper>( ::std::forward<_ExecutionPolicy>(__exec)), __f, _TagType{}, __buf.all_view()); @@ -1482,10 +1488,10 @@ struct __parallel_merge_submitter<_IdType, __internal::__optional_kernel_name<_N template class __merge_kernel_name; -template = 0> +template auto -__parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) +__parallel_merge(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, + _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -1493,19 +1499,20 @@ __parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, if (__n <= std::numeric_limits<::std::uint32_t>::max()) { using _wi_index_type = ::std::uint32_t; - using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__merge_kernel_name<_CustomName, _wi_index_type>>; - return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()(::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); - + using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< + __merge_kernel_name<_CustomName, _wi_index_type>>; + return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()( + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + ::std::forward<_Range3>(__rng3), __comp); } else { using _wi_index_type = ::std::uint64_t; - using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__merge_kernel_name<_CustomName, _wi_index_type>>; - return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()(::std::forward<_ExecutionPolicy>(__exec), - ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); + using _MergeKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider< + __merge_kernel_name<_CustomName, _wi_index_type>>; + return __parallel_merge_submitter<_wi_index_type, _MergeKernel>()( + ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), + ::std::forward<_Range3>(__rng3), __comp); } } @@ -1544,9 +1551,9 @@ struct __parallel_sort_submitter<_IdType, __internal::__optional_kernel_name<_Le __internal::__optional_kernel_name<_GlobalSortName...>, __internal::__optional_kernel_name<_CopyBackName...>> { - template + template auto - operator()(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) const + operator()(_BackendTag, _ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) const { using _Tp = oneapi::dpl::__internal::__value_t<_Range>; using _Size = oneapi::dpl::__internal::__difference_t<_Range>; @@ -1636,10 +1643,10 @@ struct __parallel_sort_submitter<_IdType, __internal::__optional_kernel_name<_Le } }; -template = 0> +template auto -__parallel_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) +__parallel_sort_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Compare __comp) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -1654,7 +1661,8 @@ __parallel_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) using _CopyBackKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__sort_copy_back_kernel<_CustomName, _wi_index_type>>; return __parallel_sort_submitter<_wi_index_type, _LeafSortKernel, _GlobalSortKernel, _CopyBackKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __comp); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range>(__rng), __comp); } else { @@ -1666,7 +1674,8 @@ __parallel_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp) using _CopyBackKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__sort_copy_back_kernel<_CustomName, _wi_index_type>>; return __parallel_sort_submitter<_wi_index_type, _LeafSortKernel, _GlobalSortKernel, _CopyBackKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __comp); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range>(__rng), __comp); } } @@ -1678,9 +1687,9 @@ template struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_GlobalSortName...>, __internal::__optional_kernel_name<_CopyBackName...>> { - template + template auto - operator()(_ExecutionPolicy&& __exec, _Range&& __rng, _Merge __merge, _Compare __comp) const + operator()(_BackendTag, _ExecutionPolicy&& __exec, _Range&& __rng, _Merge __merge, _Compare __comp) const { using _Tp = oneapi::dpl::__internal::__value_t<_Range>; using _Size = oneapi::dpl::__internal::__difference_t<_Range>; @@ -1743,10 +1752,10 @@ struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_Glo } }; -template = 0> +template auto -__parallel_partial_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Merge __merge, _Compare __comp) +__parallel_partial_sort_impl(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __rng, + _Merge __merge, _Compare __comp) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _GlobalSortKernel = @@ -1755,7 +1764,8 @@ __parallel_partial_sort_impl(_ExecutionPolicy&& __exec, _Range&& __rng, _Merge _ oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_provider<__sort_copy_back_kernel<_CustomName>>; return __parallel_partial_sort_submitter<_GlobalSortKernel, _CopyBackKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __merge, __comp); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range>(__rng), __merge, __comp); } //------------------------------------------------------------------------ @@ -1776,31 +1786,32 @@ struct __is_radix_sort_usable_for_type }; #if _USE_RADIX_SORT -template > && - __is_radix_sort_usable_for_type, _Compare>::value, - int> = 0> +template < + typename _ExecutionPolicy, typename _Range, typename _Compare, typename _Proj, + ::std::enable_if_t< + __is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare, _Proj __proj) +__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range&& __rng, _Compare, _Proj __proj) { return __parallel_radix_sort<__internal::__is_comp_ascending<::std::decay_t<_Compare>>::value>( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __proj); } #endif -template > && - !__is_radix_sort_usable_for_type, _Compare>::value, - int> = 0> +template < + typename _ExecutionPolicy, typename _Range, typename _Compare, typename _Proj, + ::std::enable_if_t< + !__is_radix_sort_usable_for_type, _Compare>::value, int> = 0> auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) +__parallel_stable_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Range&& __rng, _Compare __comp, _Proj __proj) { auto __cmp_f = [__comp, __proj](const auto& __a, const auto& __b) mutable { return __comp(__proj(__a), __proj(__b)); }; - return __parallel_sort_impl(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), __cmp_f); + return __parallel_sort_impl(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range>(__rng), + __cmp_f); } //------------------------------------------------------------------------ @@ -1810,21 +1821,19 @@ __parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __com // TODO: check if it makes sense to move these wrappers out of backend to a common place // TODO: consider changing __partial_merge_kernel to make it compatible with // __full_merge_kernel in order to use __parallel_sort_impl routine -template = 0> +template auto -__parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, - _Compare __comp) +__parallel_partial_sort(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _Iterator __first, _Iterator __mid, _Iterator __last, _Compare __comp) { const auto __mid_idx = __mid - __first; auto __keep = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read_write, _Iterator>(); auto __buf = __keep(__first, __last); - return __parallel_partial_sort_impl(::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), + return __parallel_partial_sort_impl(__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __buf.all_view(), __partial_merge_kernel{__mid_idx}, __comp); } - } // namespace __par_backend_hetero } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index b8410261c60..95d23fc16e9 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -77,10 +77,10 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... } }; -template = 0> +template auto -__parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) +__parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, + _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using __parallel_for_name = __internal::__kernel_name_provider<_CustomName>; @@ -89,201 +89,23 @@ __parallel_for(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&& __count, std::forward<_Ranges>(__rngs)...); } -//------------------------------------------------------------------------ -// parallel_transform_reduce -//------------------------------------------------------------------------ - -template = 0, - typename... _Ranges> -auto -__parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_Tp, _Commutative>( - __exec.__device_policy(), __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); -} - -//------------------------------------------------------------------------ -// parallel_transform_scan -//------------------------------------------------------------------------ - -template = 0> -auto -__parallel_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, ::std::size_t __n, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( - __exec.__device_policy(), ::std::forward<_Range1>(__in_rng), ::std::forward<_Range2>(__out_rng), __n, - __unary_op, __init, __binary_op, _Inclusive{}); -} - -template = 0> -auto -__parallel_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, - _BinaryOperation __binary_op, _InitType __init, _LocalScan __local_scan, - _GroupScan __group_scan, _GlobalScan __global_scan) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_base( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), __binary_op, __init, - __local_scan, __group_scan, __global_scan); -} - -template = 0> -auto -__parallel_copy_if(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, _Pred __pred) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_copy_if( - __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, __pred); -} - -template = 0> -auto -__parallel_scan_copy(_ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _Size __n, - _CreateMaskOp __create_mask_op, _CopyByMaskOp __copy_by_mask_op) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_scan_copy( - __exec.__device_policy(), ::std::forward<_InRng>(__in_rng), ::std::forward<_OutRng>(__out_rng), __n, - __create_mask_op, __copy_by_mask_op); -} - -//------------------------------------------------------------------------ -// __parallel_find_or -//----------------------------------------------------------------------- -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy< - _ExecutionPolicy, - ::std::conditional_t<::std::is_same_v<_BrickTag, __parallel_or_tag>, bool, - oneapi::dpl::__internal::__difference_t< - typename oneapi::dpl::__ranges::__get_first_range_type<_Ranges...>::type>>> -__parallel_find_or(_ExecutionPolicy&& __exec, _Brick __f, _BrickTag __brick_tag, _Ranges&&... __rngs) -{ - return oneapi::dpl::__par_backend_hetero::__parallel_find_or(__exec.__device_policy(), __f, __brick_tag, - ::std::forward<_Ranges>(__rngs)...); -} - -//------------------------------------------------------------------------ -// parallel_or -//----------------------------------------------------------------------- -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(__exec.__device_policy(), __first, __last, __s_first, - __s_last, __f); -} - -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, bool> -__parallel_or(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_or(__exec.__device_policy(), __first, __last, __f); -} - -//------------------------------------------------------------------------ -// parallel_find -//----------------------------------------------------------------------- - -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, _Iterator1> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __s_first, - _Iterator2 __s_last, _Brick __f, _IsFirst __is_first) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find(__exec.__device_policy(), __first, __last, __s_first, - __s_last, __f, __is_first); -} - -template -oneapi::dpl::__internal::__enable_if_fpga_execution_policy<_ExecutionPolicy, _Iterator> -__parallel_find(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __last, _Brick __f, _IsFirst __is_first) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_find(__exec.__device_policy(), __first, __last, __f, - __is_first); -} - -//------------------------------------------------------------------------ -// parallel_merge -//----------------------------------------------------------------------- - -template -auto -__parallel_merge(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) - -> oneapi::dpl::__internal::__enable_if_fpga_execution_policy< - _ExecutionPolicy, decltype(oneapi::dpl::__par_backend_hetero::__parallel_merge( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), ::std::forward<_Range3>(__rng3), __comp))> -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_merge( - __exec.__device_policy(), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - ::std::forward<_Range3>(__rng3), __comp); -} - -//------------------------------------------------------------------------ -// parallel_stable_sort -//----------------------------------------------------------------------- - -template = 0> -auto -__parallel_stable_sort(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _Proj __proj) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_stable_sort(__exec.__device_policy(), - ::std::forward<_Range>(__rng), __comp, __proj); -} - -//------------------------------------------------------------------------ -// parallel_partial_sort -//----------------------------------------------------------------------- - -// TODO: check if it makes sense to move these wrappers out of backend to a common place -template = 0> -auto -__parallel_partial_sort(_ExecutionPolicy&& __exec, _Iterator __first, _Iterator __mid, _Iterator __last, - _Compare __comp) -{ - // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_partial_sort(__exec.__device_policy(), __first, __mid, __last, - __comp); -} - //------------------------------------------------------------------------ // parallel_histogram //----------------------------------------------------------------------- // TODO: check if it makes sense to move these wrappers out of backend to a common place -template = 0> +template auto -__parallel_histogram(_ExecutionPolicy&& __exec, const _Event& __init_event, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) +__parallel_histogram(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, const _Event& __init_event, + _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { static_assert(sizeof(oneapi::dpl::__internal::__value_t<_Range2>) <= sizeof(::std::uint32_t), "histogram is not supported on FPGA devices with output types greater than 32 bits"); // workaround until we implement more performant version for patterns - return oneapi::dpl::__par_backend_hetero::__parallel_histogram(__exec.__device_policy(), __init_event, - ::std::forward<_Range1>(__input), - ::std::forward<_Range2>(__bins), __binhash_manager); + return oneapi::dpl::__par_backend_hetero::__parallel_histogram( + oneapi::dpl::__internal::__device_backend_tag{}, __exec, __init_event, ::std::forward<_Range1>(__input), + ::std::forward<_Range2>(__bins), __binhash_manager); } } // namespace __par_backend_hetero diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index fdcf06ad984..e7ac2ba50e1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -285,9 +285,9 @@ struct __histogram_general_registers_local_reduction_submitter<__iters_per_work_ template <::std::uint16_t __iters_per_work_item, ::std::uint8_t __bins_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__histogram_general_registers_local_reduction(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) +__histogram_general_registers_local_reduction(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __work_group_size, + _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; @@ -380,9 +380,9 @@ struct __histogram_general_local_atomics_submitter<__iters_per_work_item, template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__histogram_general_local_atomics(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) +__histogram_general_local_atomics(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __work_group_size, _Range1&& __input, + _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; @@ -405,11 +405,11 @@ struct __histogram_general_private_global_atomics_submitter; template struct __histogram_general_private_global_atomics_submitter<__internal::__optional_kernel_name<_KernelName...>> { - template + template auto - operator()(_ExecutionPolicy&& __exec, const sycl::event& __init_event, ::std::uint16_t __min_iters_per_work_item, - ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, - const _BinHashMgr& __binhash_manager) + operator()(_BackendTag, _ExecutionPolicy&& __exec, const sycl::event& __init_event, + ::std::uint16_t __min_iters_per_work_item, ::std::uint16_t __work_group_size, _Range1&& __input, + _Range2&& __bins, const _BinHashMgr& __binhash_manager) { const ::std::size_t __n = __input.size(); const ::std::size_t __num_bins = __bins.size(); @@ -477,9 +477,10 @@ struct __histogram_general_private_global_atomics_submitter<__internal::__option }; template auto -__histogram_general_private_global_atomics(_ExecutionPolicy&& __exec, const sycl::event& __init_event, - ::std::uint16_t __min_iters_per_work_item, ::std::uint16_t __work_group_size, - _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) +__histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, ::std::uint16_t __min_iters_per_work_item, + ::std::uint16_t __work_group_size, _Range1&& __input, _Range2&& __bins, + const _BinHashMgr& __binhash_manager) { using _kernel_base_name = typename ::std::decay_t<_ExecutionPolicy>::kernel_name; @@ -487,14 +488,16 @@ __histogram_general_private_global_atomics(_ExecutionPolicy&& __exec, const sycl __histo_kernel_private_glocal_atomics<_kernel_base_name>>; return __histogram_general_private_global_atomics_submitter<_global_atomics_name>()( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __min_iters_per_work_item, __work_group_size, - ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); + oneapi::dpl::__internal::__device_backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __min_iters_per_work_item, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), + __binhash_manager); } template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> auto -__parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, +__parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { using _private_histogram_type = ::std::uint16_t; @@ -513,7 +516,7 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& { return __future( __histogram_general_registers_local_reduction<__iters_per_work_item, __max_work_item_private_bins>( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } // if bins fit into SLM, use local atomics @@ -522,8 +525,8 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& __local_mem_size) { return __future(__histogram_general_local_atomics<__iters_per_work_item>( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, ::std::forward<_Range1>(__input), - ::std::forward<_Range2>(__bins), __binhash_manager)); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size, + ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } else // otherwise, use global atomics (private copies per workgroup) { @@ -533,26 +536,27 @@ __parallel_histogram_select_kernel(_ExecutionPolicy&& __exec, const sycl::event& // private copies of the histogram bins in global memory. No unrolling is taken advantage of here because it // is a runtime argument. return __future(__histogram_general_private_global_atomics( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item, __work_group_size, - ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item, + __work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager)); } } template auto -__parallel_histogram(_ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, +__parallel_histogram(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) { if (__input.size() < 1048576) // 2^20 { return __parallel_histogram_select_kernel( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } else { return __parallel_histogram_select_kernel( - ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager); } } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index 04fa8d3f124..4fd1aa1bef9 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -757,7 +757,8 @@ struct __parallel_radix_sort_iteration //----------------------------------------------------------------------- template auto -__parallel_radix_sort(_ExecutionPolicy&& __exec, _Range&& __in_rng, _Proj __proj) +__parallel_radix_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __in_rng, + _Proj __proj) { const ::std::size_t __n = __in_rng.size(); assert(__n > 1); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index c856083041e..cd474afbf1f 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -106,11 +106,10 @@ struct __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __ite __internal::__optional_kernel_name<_Name...>> { template = 0, typename... _Ranges> auto - operator()(_ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, const _Size __n, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) const { auto __transform_pattern = unseq_backend::transform_reduce<_ExecutionPolicy, __iters_per_work_item, _ReduceOp, _TransformOp, @@ -140,9 +139,10 @@ struct __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __ite template = 0, typename... _Ranges> + typename... _Ranges> auto -__parallel_transform_reduce_small_impl(_ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, +__parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, const _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -150,9 +150,9 @@ __parallel_transform_reduce_small_impl(_ExecutionPolicy&& __exec, const _Size __ __reduce_small_kernel<::std::integral_constant<::std::uint8_t, __iters_per_work_item>, _CustomName>>; return __parallel_transform_reduce_small_submitter<_Tp, __work_group_size, __iters_per_work_item, _Commutative, - _ReduceKernel>()(::std::forward<_ExecutionPolicy>(__exec), __n, - __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + _ReduceKernel>()( + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + ::std::forward<_Ranges>(__rngs)...); } // Submits the first kernel of the parallel_transform_reduce for mid-sized arrays. @@ -168,11 +168,11 @@ struct __parallel_transform_reduce_device_kernel_submitter<_Tp, __work_group_siz __internal::__optional_kernel_name<_KernelName...>> { template = 0, typename... _Ranges> auto - operator()(_ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, sycl::buffer<_Tp>& __temp, _Ranges&&... __rngs) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Size __n, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, sycl::buffer<_Tp>& __temp, + _Ranges&&... __rngs) const { auto __transform_pattern = unseq_backend::transform_reduce<_ExecutionPolicy, __iters_per_work_item, _ReduceOp, _TransformOp, @@ -209,11 +209,11 @@ template > { - template = 0> + template auto - operator()(_ExecutionPolicy&& __exec, sycl::event& __reduce_event, _Size __n, _ReduceOp __reduce_op, - _TransformOp __transform_op, _InitType __init, sycl::buffer<_Tp>& __temp) const + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, sycl::event& __reduce_event, + _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, + sycl::buffer<_Tp>& __temp) const { using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; auto __transform_pattern = @@ -259,10 +259,10 @@ struct __parallel_transform_reduce_work_group_kernel_submitter< template = 0, typename... _Ranges> + typename _Size, typename _ReduceOp, typename _TransformOp, typename _InitType, typename... _Ranges> auto -__parallel_transform_reduce_mid_impl(_ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, +__parallel_transform_reduce_mid_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, + _ExecutionPolicy&& __exec, _Size __n, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; @@ -283,12 +283,14 @@ __parallel_transform_reduce_mid_impl(_ExecutionPolicy&& __exec, _Size __n, _Redu sycl::event __reduce_event = __parallel_transform_reduce_device_kernel_submitter<_Tp, __work_group_size, __iters_per_work_item_device_kernel, _Commutative, _ReduceDeviceKernel>()( - __exec, __n, __reduce_op, __transform_op, __init, __temp, ::std::forward<_Ranges>(__rngs)...); + __backend_tag, __exec, __n, __reduce_op, __transform_op, __init, __temp, + ::std::forward<_Ranges>(__rngs)...); __n = __n_groups; // Number of preliminary results from the device kernel. return __parallel_transform_reduce_work_group_kernel_submitter< _Tp, __work_group_size, __iters_per_work_item_work_group_kernel, _Commutative, _ReduceWorkGroupKernel>()( - ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, __reduce_op, __transform_op, __init, __temp); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __reduce_event, __n, __reduce_op, __transform_op, + __init, __temp); } // General implementation using a tree reduction @@ -296,11 +298,11 @@ template = 0, typename... _Ranges> static auto - submit(_ExecutionPolicy&& __exec, _Size __n, ::std::uint16_t __work_group_size, _ReduceOp __reduce_op, - _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) + submit(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Size __n, + ::std::uint16_t __work_group_size, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, + _Ranges&&... __rngs) { using _CustomName = oneapi::dpl::__internal::__policy_kernel_name<_ExecutionPolicy>; using _NoOpFunctor = unseq_backend::walk_n<_ExecutionPolicy, oneapi::dpl::__internal::__no_op>; @@ -417,11 +419,10 @@ struct __parallel_transform_reduce_impl // Big arrays are processed with a recursive tree reduction. __work_group_size * __iters_per_work_item elements are // reduced in each step. template = 0, - typename... _Ranges> + typename _InitType, typename... _Ranges> auto -__parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _TransformOp __transform_op, - _InitType __init, _Ranges&&... __rngs) +__parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, + _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); assert(__n > 0); @@ -437,37 +438,37 @@ __parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _T if (__n <= 256) { return __parallel_transform_reduce_small_impl<_Tp, 256, 1, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 512) { return __parallel_transform_reduce_small_impl<_Tp, 256, 2, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 1024) { return __parallel_transform_reduce_small_impl<_Tp, 256, 4, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 2048) { return __parallel_transform_reduce_small_impl<_Tp, 256, 8, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4096) { return __parallel_transform_reduce_small_impl<_Tp, 256, 16, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8192) { return __parallel_transform_reduce_small_impl<_Tp, 256, 32, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } @@ -477,44 +478,44 @@ __parallel_transform_reduce(_ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _T else if (__n <= 2097152) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 1, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 4194304) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 2, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 8388608) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 4, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 16777216) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 8, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 33554432) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 16, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } else if (__n <= 67108864) { return __parallel_transform_reduce_mid_impl<_Tp, 256, 32, 32, _Commutative>( - ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __reduce_op, __transform_op, __init, ::std::forward<_Ranges>(__rngs)...); } } // Otherwise use a recursive tree reduction. return __parallel_transform_reduce_impl<_Tp, 32, _Commutative>::submit( - ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, __init, - ::std::forward<_Ranges>(__rngs)...); + __backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __n, __work_group_size, __reduce_op, __transform_op, + __init, ::std::forward<_Ranges>(__rngs)...); } } // namespace __par_backend_hetero diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index c0c0ee40946..bbfd53662ad 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -386,8 +386,6 @@ class __buffer_impl __container_t __container; public: - static_assert(::std::is_same_v<_ExecutionPolicy, ::std::decay_t<_ExecutionPolicy>>); - __buffer_impl(_ExecutionPolicy /*__exec*/, ::std::size_t __n_elements) : __container{sycl::range<1>(__n_elements)} { } diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h index ea0bbcc0010..1821301e911 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h @@ -716,6 +716,32 @@ struct __get_sycl_range } }; +//---------------------------------------------------------- +// __select_backend (for the hetero policies) +//---------------------------------------------------------- + +//TODO required correct implementation of this __ranges::__select_backend() +// 1. There is still not RA ranges checks +// 2. Obviously, a return tag is not necessarily oneapi::dpl::__internal::__hetero_tag +template +oneapi::dpl::__internal::__hetero_tag +__select_backend(const execution::device_policy<_KernelName>&, _Ranges&&...) +{ + return {}; +} + +#if _ONEDPL_FPGA_DEVICE +//TODO required correct implementation of this __ranges::__select_backend() +// 1. There is still not RA ranges checks +// 2. Obviously, a return tag is not necessarily oneapi::dpl::__internal::__hetero_tag +template +oneapi::dpl::__internal::__hetero_tag +__select_backend(const execution::fpga_policy<_Factor, _KernelName>&, _Ranges&&...) +{ + return {}; +} +#endif + } // namespace __ranges } // namespace dpl } // namespace oneapi diff --git a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h index 27179622b6b..87d22e9a0a7 100644 --- a/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/histogram_impl_hetero.h @@ -117,11 +117,12 @@ struct __hist_fill_zeros_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy> -__pattern_histogram(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _Size __num_bins, _BinHash&& __func, _RandomAccessIterator2 __histogram_first) +template +void +__pattern_histogram(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _Size __num_bins, _BinHash&& __func, + _RandomAccessIterator2 __histogram_first) { //If there are no histogram bins there is nothing to do if (__num_bins > 0) @@ -143,7 +144,7 @@ __pattern_histogram(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _ //fill histogram bins with zeros auto __init_event = oneapi::dpl::__par_backend_hetero::__parallel_for( - oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__hist_fill_zeros_wrapper>(__exec), + _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__hist_fill_zeros_wrapper>(__exec), unseq_backend::walk_n<_ExecutionPolicy, decltype(__fill_func)>{__fill_func}, __num_bins, __bins); if (__n > 0) @@ -156,8 +157,8 @@ __pattern_histogram(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _ _RandomAccessIterator1>(); auto __input_buf = __keep_input(__first, __last); - __parallel_histogram(::std::forward<_ExecutionPolicy>(__exec), __init_event, __input_buf.all_view(), - ::std::move(__bins), __binhash_manager) + __parallel_histogram(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __init_event, + __input_buf.all_view(), ::std::move(__bins), __binhash_manager) .wait(); } else diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 60c1001f5b8..23beae41c10 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -37,12 +37,12 @@ namespace __internal // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, + _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { if (__first1 == __last1) return __init; @@ -60,7 +60,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf1.all_view(), __buf2.all_view()) .get(); @@ -70,12 +70,12 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __f // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, - _BinaryOperation __binary_op, _UnaryOperation __unary_op, /*vector=*/::std::true_type, - /*parallel=*/::std::true_type) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _ForwardIterator __first, + _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) { if (__first == __last) return __init; @@ -88,7 +88,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value __buf.all_view()) .get(); @@ -122,11 +122,12 @@ __iterators_possibly_equal(const sycl_iterator<_Mode1, _T, _Allocator>& __it1, } #endif // _ONEDPL_BACKEND_SYCL -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) +template +_Iterator2 +__pattern_transform_scan_base(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _InitType __init, + _BinaryOperation __binary_op, _Inclusive) { if (__first == __last) return __result; @@ -143,9 +144,9 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _Iterator2>(); auto __buf2 = __keep2(__result, __result + __n); - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(::std::forward<_ExecutionPolicy>(__exec), - __buf1.all_view(), __buf2.all_view(), __n, - __unary_op, __init, __binary_op, _Inclusive{}) + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan( + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, + __unary_op, __init, __binary_op, _Inclusive{}) .wait(); } else @@ -168,48 +169,48 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Iterator1 __first, _It auto __buf2 = __keep2(__first_tmp, __last_tmp); // Run main algorithm and save data into temporary buffer - oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(__policy, __buf1.all_view(), __buf2.all_view(), - __n, __unary_op, __init, __binary_op, _Inclusive{}) + oneapi::dpl::__par_backend_hetero::__parallel_transform_scan(_BackendTag{}, __policy, __buf1.all_view(), + __buf2.all_view(), __n, __unary_op, __init, + __binary_op, _Inclusive{}) .wait(); // Move data from temporary buffer into results - oneapi::dpl::__internal::__pattern_walk2_brick(::std::move(__policy), __first_tmp, __last_tmp, __result, - oneapi::dpl::__internal::__brick_move<_NewExecutionPolicy>{}, - ::std::true_type{}); + oneapi::dpl::__internal::__pattern_walk2_brick( + __tag, ::std::move(__policy), __first_tmp, __last_tmp, __result, + oneapi::dpl::__internal::__brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer } return __result + __n; } - -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator2 +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, _Type __init, + _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, _InitType{__init}, __binary_op, _Inclusive{}); } // scan without initial element -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Iterator2> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Iterator1 __first, _Iterator1 __last, _Iterator2 __result, - _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - /*vector=*/::std::true_type, /*parallel=*/::std::true_type) +template +_Iterator2 +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Iterator1 __first, + _Iterator1 __last, _Iterator2 __result, _UnaryOperation __unary_op, + _BinaryOperation __binary_op, _Inclusive) { using _Type = typename ::std::iterator_traits<_Iterator1>::value_type; using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __unary_op, _InitType{}, __binary_op, _Inclusive{}); } @@ -223,11 +224,11 @@ struct adjacent_difference_wrapper { }; -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _ForwardIterator2> -__pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, - _ForwardIterator2 __d_first, _BinaryOperation __op, /*vector*/ ::std::true_type, - /*parallel*/ ::std::true_type) +template +_ForwardIterator2 +__pattern_adjacent_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first, + _ForwardIterator1 __last, _ForwardIterator2 __d_first, _BinaryOperation __op) { auto __n = __last - __first; if (__n <= 0) @@ -242,13 +243,12 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir // if we have the only element, just copy it according to the specification if (__n == 1) { - return __internal::__except_handler([&__exec, __first, __last, __d_first, __d_last, &__op]() { + return __internal::__except_handler([__tag, &__exec, __first, __last, __d_first, __d_last, &__op]() { auto __wrapped_policy = __par_backend_hetero::make_wrapped_policy( ::std::forward<_ExecutionPolicy>(__exec)); - __internal::__pattern_walk2_brick(__wrapped_policy, __first, __last, __d_first, - __internal::__brick_copy{}, - ::std::true_type{}); + __internal::__pattern_walk2_brick(__tag, __wrapped_policy, __first, __last, __d_first, + __internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); return __d_last; }); @@ -270,8 +270,8 @@ __pattern_adjacent_difference(_ExecutionPolicy&& __exec, _ForwardIterator1 __fir using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; - oneapi::dpl::__par_backend_hetero::__parallel_for(__exec, _Function{__fn}, __n, __buf1.all_view(), - __buf2.all_view()) + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, __exec, _Function{__fn}, __n, + __buf1.all_view(), __buf2.all_view()) .wait(); return __d_last; diff --git a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h index 0b104a5ff4b..969b05ab914 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_ranges_impl_hetero.h @@ -37,11 +37,11 @@ namespace __ranges // transform_reduce (version with two binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Tp __init, - _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { if (__rng1.empty()) return __init; @@ -51,7 +51,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2& return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op1, _Functor{__binary_op2}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2)) .get(); @@ -61,10 +61,11 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2& // transform_reduce (with unary and binary functions) //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init, _BinaryOperation __binary_op, - _UnaryOperation __unary_op) +template +_Tp +__pattern_transform_reduce(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op) { if (__rng.empty()) return __init; @@ -74,7 +75,7 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init return oneapi::dpl::__par_backend_hetero::__parallel_transform_reduce<_RepackedTp, ::std::true_type /*is_commutative*/>( - ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __binary_op, _Functor{__unary_op}, unseq_backend::__init_value<_RepackedTp>{__init}, // initial value ::std::forward<_Range>(__rng)) .get(); @@ -84,12 +85,11 @@ __pattern_transform_reduce(_ExecutionPolicy&& __exec, _Range&& __rng, _Tp __init // transform_scan //------------------------------------------------------------------------ -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _InitType __init, _BinaryOperation __binary_op, _Inclusive) +template +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan_base(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, _Inclusive) { if (__rng1.empty()) return 0; @@ -106,8 +106,8 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Rang _NoOpFunctor __get_data_op; oneapi::dpl::__par_backend_hetero::__parallel_transform_scan_base( - ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), - __binary_op, __init, + _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), + ::std::forward<_Range2>(__rng2), __binary_op, __init, // local scan unseq_backend::__scan<_Inclusive, _ExecutionPolicy, _BinaryOperation, _UnaryFunctor, _Assigner, _Assigner, _NoOpFunctor, _InitType>{__binary_op, _UnaryFunctor{__unary_op}, __assign_op, __assign_op, @@ -122,36 +122,34 @@ __pattern_transform_scan_base(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Rang return __rng1_size; } -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _Type __init, _BinaryOperation __binary_op, _Inclusive) +template +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _Type __init, _BinaryOperation __binary_op, _Inclusive) { using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), __unary_op, _InitType{__init}, __binary_op, - _Inclusive{}); + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), __unary_op, + _InitType{__init}, __binary_op, _Inclusive{}); } // scan without initial element -template -oneapi::dpl::__internal::__enable_if_hetero_execution_policy<_ExecutionPolicy, - oneapi::dpl::__internal::__difference_t<_Range2>> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _UnaryOperation __unary_op, - _BinaryOperation __binary_op, _Inclusive) +oneapi::dpl::__internal::__difference_t<_Range2> +__pattern_transform_scan(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, + _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) { using _Type = oneapi::dpl::__internal::__value_t<_Range1>; using _RepackedType = __par_backend_hetero::__repacked_tuple_t<_Type>; using _InitType = unseq_backend::__no_init_value<_RepackedType>; - return __pattern_transform_scan_base(::std::forward<_ExecutionPolicy>(__exec), ::std::forward<_Range1>(__rng1), - ::std::forward<_Range2>(__rng2), __unary_op, _InitType{}, __binary_op, - _Inclusive{}); + return __pattern_transform_scan_base(__tag, ::std::forward<_ExecutionPolicy>(__exec), + ::std::forward<_Range1>(__rng1), ::std::forward<_Range2>(__rng2), __unary_op, + _InitType{}, __binary_op, _Inclusive{}); } } // namespace __ranges diff --git a/include/oneapi/dpl/pstl/histogram_impl.h b/include/oneapi/dpl/pstl/histogram_impl.h index 4c8f5204793..362685bd19c 100644 --- a/include/oneapi/dpl/pstl/histogram_impl.h +++ b/include/oneapi/dpl/pstl/histogram_impl.h @@ -32,12 +32,14 @@ namespace dpl namespace __internal { -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy> -__pattern_histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, +void +__pattern_histogram(_Tag, _ExecutionPolicy&& exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _Size __num_bins, _IdxHashFunc __func, _RandomAccessIterator2 __histogram_first) { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + static_assert(sizeof(_Size) == 0 /*false*/, "Histogram API is currently unsupported for policies other than device execution policies"); } @@ -50,8 +52,10 @@ oneapi::dpl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomA histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 first, _RandomAccessIterator1 last, _Size num_bins, _ValueType first_bin_min_val, _ValueType last_bin_max_val, _RandomAccessIterator2 histogram_first) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(exec, first, histogram_first); + oneapi::dpl::__internal::__pattern_histogram( - ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, oneapi::dpl::__internal::__evenly_divided_binhash<_ValueType>(first_bin_min_val, last_bin_max_val, num_bins), histogram_first); return histogram_first + num_bins; @@ -64,9 +68,11 @@ histogram(_ExecutionPolicy&& exec, _RandomAccessIterator1 first, _RandomAccessIt _RandomAccessIterator2 boundary_first, _RandomAccessIterator2 boundary_last, _RandomAccessIterator3 histogram_first) { + const auto __dispatch_tag = oneapi::dpl::__internal::__select_backend(exec, first, boundary_first, histogram_first); + ::std::ptrdiff_t num_bins = boundary_last - boundary_first - 1; oneapi::dpl::__internal::__pattern_histogram( - ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, + __dispatch_tag, ::std::forward<_ExecutionPolicy>(exec), first, last, num_bins, oneapi::dpl::__internal::__custom_boundary_binhash{boundary_first, boundary_last}, histogram_first); return histogram_first + num_bins; } diff --git a/include/oneapi/dpl/pstl/iterator_defs.h b/include/oneapi/dpl/pstl/iterator_defs.h index ad778064493..1b85d74fc40 100644 --- a/include/oneapi/dpl/pstl/iterator_defs.h +++ b/include/oneapi/dpl/pstl/iterator_defs.h @@ -27,61 +27,33 @@ namespace dpl namespace __internal { -// Internal wrapper around ::std::iterator_traits as it is required to be -// SFINAE-friendly(not produce "hard" error when _Ip is not an iterator) -// only starting with C++17. Although many standard library implementations -// provide it for older versions, we cannot rely on that. -template -struct __iterator_traits -{ -}; - -template -struct __iterator_traits<_Ip, - ::std::void_t> - : ::std::iterator_traits<_Ip> -{ -}; - -// Handles _Tp* and const _Tp* specializations -template -struct __iterator_traits<_Tp*, void> : ::std::iterator_traits<_Tp*> -{ -}; - -// Make is_random_access_iterator not to fail with a 'hard' error when it's used in SFINAE with -// a non-iterator type by providing a default value. -template -struct __is_random_access_iterator_impl : ::std::false_type -{ -}; +// Make is_random_access_iterator and is_forward_iterator not to fail with a 'hard' error when it's used in +// SFINAE with a non-iterator type by providing a default value. +template +auto +__is_iterator_of(int) -> decltype( + ::std::conjunction<::std::is_base_of< + _IteratorTag, typename ::std::iterator_traits<::std::decay_t<_IteratorTypes>>::iterator_category>...>{}); -template -struct __is_random_access_iterator_impl<_IteratorType, - ::std::void_t::iterator_category>> - : ::std::is_same::iterator_category, ::std::random_access_iterator_tag> -{ -}; +template +auto +__is_iterator_of(...) -> ::std::false_type; -/* iterator */ -template -struct __is_random_access_iterator - : ::std::conditional_t<__is_random_access_iterator_impl<_IteratorType>::value, - __is_random_access_iterator<_OtherIteratorTypes...>, ::std::false_type> +template +struct __is_random_access_iterator : decltype(__is_iterator_of<::std::random_access_iterator_tag, _IteratorTypes...>(0)) { }; -template -struct __is_random_access_iterator<_IteratorType> : __is_random_access_iterator_impl<_IteratorType> +template +struct __is_forward_iterator : decltype(__is_iterator_of<::std::forward_iterator_tag, _IteratorTypes...>(0)) { }; template -using __is_random_access_iterator_t = typename __is_random_access_iterator<_IteratorTypes...>::type; +inline constexpr bool __is_random_access_iterator_v = __is_random_access_iterator<_IteratorTypes...>::value; template -inline constexpr bool __is_random_access_iterator_v = __is_random_access_iterator<_IteratorTypes...>::value; +inline constexpr bool __is_forward_iterator_v = __is_forward_iterator<_IteratorTypes...>::value; } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/numeric_fwd.h b/include/oneapi/dpl/pstl/numeric_fwd.h index c663cd5c2d4..1c835443e78 100644 --- a/include/oneapi/dpl/pstl/numeric_fwd.h +++ b/include/oneapi/dpl/pstl/numeric_fwd.h @@ -25,6 +25,8 @@ namespace dpl { namespace __internal { +template +struct __parallel_tag; //------------------------------------------------------------------------ // transform_reduce (version with two binary functions, according to draft N4659) @@ -41,19 +43,17 @@ _Tp __brick_transform_reduce(_ForwardIterator1, _ForwardIterator1, _ForwardItera _BinaryOperation2, /*__is_vector=*/::std::false_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, - _BinaryOperation1, _BinaryOperation2, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, + _BinaryOperation1, _BinaryOperation2) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, - _Tp, _BinaryOperation1, _BinaryOperation2, _IsVector __is_vector, - /*is_parallel=*/::std::true_type); +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _Tp, _BinaryOperation1, _BinaryOperation2); //------------------------------------------------------------------------ // transform_reduce (version with unary and binary functions) @@ -67,19 +67,29 @@ template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, - _UnaryOperation, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Tp, _BinaryOperation, - _UnaryOperation, _IsVector, - /*is_parallel=*/::std::true_type); +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, + _BinaryOperation1, _BinaryOperation2 __bnary_op2) noexcept; + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _Tp, _BinaryOperation1, _BinaryOperation2); + +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, + _UnaryOperation) noexcept; + +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _Tp, _BinaryOperation, _UnaryOperation); //------------------------------------------------------------------------ // transform_exclusive_scan @@ -97,36 +107,30 @@ ::std::pair<_OutputIterator, _Tp> __brick_transform_scan(_RandomAccessIterator, _UnaryOperation, _Tp, _BinaryOperation, /*Inclusive*/ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation, _Tp, - _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _UnaryOperation, _Tp, _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::true_type); - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - ::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _UnaryOperation, _Tp, _BinaryOperation, _Inclusive, _IsVector, - /*is_parallel=*/::std::true_type); +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation, + _Tp, _BinaryOperation, _Inclusive) noexcept; + +template +::std::enable_if_t, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); + +template +::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, + _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive); // transform_scan without initial element -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - _IsVector __is_vector, _IsParallel __is_parallel); +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&& __exec, _ForwardIterator, _ForwardIterator, _OutputIterator, + _UnaryOperation, _BinaryOperation, _Inclusive); //------------------------------------------------------------------------ // adjacent_difference @@ -141,17 +145,16 @@ _OutputIterator __brick_adjacent_difference(_RandomAccessIterator, _RandomAccess _BinaryOperation, /*is_vector*/ ::std::true_type) noexcept; -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryOperation, - _IsVector, /*is_parallel*/ ::std::false_type) noexcept; - -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _OutputIterator, - _BinaryOperation, _IsVector, /*is_parallel*/ ::std::true_type); +template +_OutputIterator +__pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, + _BinaryOperation) noexcept; + +template +_RandomAccessIterator2 +__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, + _RandomAccessIterator1, _RandomAccessIterator2, _BinaryOperation); } // namespace __internal } // namespace dpl diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index 8e65580918c..b87a02ba428 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -61,36 +61,40 @@ __brick_transform_reduce(_RandomAccessIterator1 __first1, _RandomAccessIterator1 [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept + _BinaryOperation2 __binary_op2) noexcept { - return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, - _BinaryOperation2 __binary_op2, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init, + _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable { return __binary_op2(*__i, *(__first2 + (__i - __first1))); }, __init, __binary_op1, // Combine - [__first1, __first2, __binary_op1, __binary_op2, - __is_vector](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, _Tp __init) -> _Tp { + [__first1, __first2, __binary_op1, __binary_op2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j, + _Tp __init) -> _Tp { return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init, __binary_op1, - __binary_op2, __is_vector); + __binary_op2, _IsVector{}); }); }); } @@ -123,29 +127,33 @@ __brick_transform_reduce(_RandomAccessIterator __first, _RandomAccessIterator __ [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, - _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, - /*is_parallel=*/::std::false_type) noexcept +template +_Tp +__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init, + _BinaryOperation __binary_op, _UnaryOperation __unary_op) noexcept { - return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op, + typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _Tp> -__pattern_transform_reduce(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __unary_op, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_Tp +__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Tp __init, _BinaryOperation __binary_op, + _UnaryOperation __unary_op) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + return __internal::__except_handler([&]() { return __par_backend::__parallel_transform_reduce( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op, - [__unary_op, __binary_op, __is_vector](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { - return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, __is_vector); + [__unary_op, __binary_op](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) { + return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, _IsVector{}); }); }); } @@ -228,31 +236,34 @@ __brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __la /*is_vector=*/::std::false_type()); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, +template +_OutputIterator +__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::false_type) noexcept + _Inclusive) noexcept { + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(), - __is_vector) + typename _Tag::__is_vector{}) .first; } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional< - _ExecutionPolicy, !::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +::std::enable_if_t, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, _Inclusive) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; return __internal::__except_handler([&]() { __par_backend::__parallel_transform_scan( - ::std::forward<_ExecutionPolicy>(__exec), __last - __first, + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __last - __first, [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init, __binary_op, [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init) { @@ -261,24 +272,24 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs __unary_op, /*__is_vector*/ ::std::false_type()); }, - [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __j, - _Tp __init) { + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, _Tp __init) { return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op, - __init, __binary_op, _Inclusive(), __is_vector) + __init, __binary_op, _Inclusive(), _IsVector{}) .second; }); return __result + (__last - __first); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy_conditional<_ExecutionPolicy, - ::std::is_floating_point_v<_Tp>, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, - _Inclusive, _IsVector __is_vector, /*is_parallel=*/::std::true_type) +template +::std::enable_if_t<::std::is_floating_point_v<_Tp>, _OutputIterator> +__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, + _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, + _BinaryOperation __binary_op, _Inclusive) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_RandomAccessIterator>::difference_type _DifferenceType; _DifferenceType __n = __last - __first; @@ -286,12 +297,13 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs { return __result; } + return __internal::__except_handler([&]() { __par_backend::__parallel_strict_scan( - ::std::forward<_ExecutionPolicy>(__exec), __n, __init, - [__first, __unary_op, __binary_op, __result, __is_vector](_DifferenceType __i, _DifferenceType __len) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __init, + [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len) { return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i, - __unary_op, _Tp{}, __binary_op, _Inclusive(), __is_vector) + __unary_op, _Tp{}, __binary_op, _Inclusive(), _IsVector{}) .second; }, __binary_op, @@ -309,20 +321,22 @@ __pattern_transform_scan(_ExecutionPolicy&& __exec, _RandomAccessIterator __firs } // transform_scan without initial element -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_transform_scan(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive, - _IsVector __is_vector, _IsParallel __is_parallel) +template +_OutputIterator +__pattern_transform_scan(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __result, _UnaryOperation __unary_op, _BinaryOperation __binary_op, _Inclusive) { + static_assert(__is_host_dispatch_tag_v<_Tag>); + typedef typename ::std::iterator_traits<_ForwardIterator>::value_type _ValueType; if (__first != __last) { _ValueType __tmp = __unary_op(*__first); *__result = __tmp; - return __pattern_transform_scan(::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, - __unary_op, __tmp, __binary_op, _Inclusive(), __is_vector, __is_parallel); + + return __pattern_transform_scan(__tag, ::std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result, + __unary_op, __tmp, __binary_op, _Inclusive()); } else { @@ -360,36 +374,37 @@ __brick_adjacent_difference(_RandomAccessIterator1 __first, _RandomAccessIterato [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); }); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _OutputIterator> -__pattern_adjacent_difference(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, - _OutputIterator __d_first, _BinaryOperation __op, _IsVector __is_vector, - /*is_parallel*/ ::std::false_type) noexcept +template +_OutputIterator +__pattern_adjacent_difference(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, + _OutputIterator __d_first, _BinaryOperation __op) noexcept { - return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, __is_vector); + static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); + + return __internal::__brick_adjacent_difference(__first, __last, __d_first, __op, typename _Tag::__is_vector{}); } -template -oneapi::dpl::__internal::__enable_if_host_execution_policy<_ExecutionPolicy, _RandomAccessIterator2> -__pattern_adjacent_difference(_ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, _RandomAccessIterator1 __last, - _RandomAccessIterator2 __d_first, _BinaryOperation __op, _IsVector __is_vector, - /*is_parallel=*/::std::true_type) +template +_RandomAccessIterator2 +__pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first, _BinaryOperation __op) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + assert(__first != __last); typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; *__d_first = *__first; __par_backend::__parallel_for( - ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, - [&__op, __is_vector, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, + [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { _RandomAccessIterator2 __d_b = __d_first + (__b - __first); __internal::__brick_walk3( __b, __e, __b + 1, __d_b + 1, [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, - __is_vector); + _IsVector{}); }); return __d_first + (__last - __first); } diff --git a/include/oneapi/dpl/pstl/omp/parallel_for.h b/include/oneapi/dpl/pstl/omp/parallel_for.h index 5b6ed66453a..1a0ea24d798 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for.h @@ -49,7 +49,7 @@ __parallel_for_body(_Index __first, _Index __last, _Fp __f) template void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) { if (omp_in_parallel()) { diff --git a/include/oneapi/dpl/pstl/omp/parallel_for_each.h b/include/oneapi/dpl/pstl/omp/parallel_for_each.h index 7877ef095ef..32410cbe927 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_for_each.h +++ b/include/oneapi/dpl/pstl/omp/parallel_for_each.h @@ -44,7 +44,8 @@ __parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp template void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f) +__parallel_for_each(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _ForwardIterator __first, + _ForwardIterator __last, _Fp __f) { if (omp_in_parallel()) { diff --git a/include/oneapi/dpl/pstl/omp/parallel_invoke.h b/include/oneapi/dpl/pstl/omp/parallel_invoke.h index 32491ab9dfd..3503096add5 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_invoke.h +++ b/include/oneapi/dpl/pstl/omp/parallel_invoke.h @@ -38,7 +38,7 @@ __parallel_invoke_body(_F1&& __f1, _F2&& __f2) template void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +__parallel_invoke(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) { if (omp_in_parallel()) { diff --git a/include/oneapi/dpl/pstl/omp/parallel_merge.h b/include/oneapi/dpl/pstl/omp/parallel_merge.h index 911d4b2643b..162ef097801 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_merge.h +++ b/include/oneapi/dpl/pstl/omp/parallel_merge.h @@ -71,10 +71,9 @@ __parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessI template void -__parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, - _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, - _LeafMerge __leaf_merge) - +__parallel_merge(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) { std::size_t __size_x = __xe - __xs; std::size_t __size_y = __ye - __ys; diff --git a/include/oneapi/dpl/pstl/omp/parallel_reduce.h b/include/oneapi/dpl/pstl/omp/parallel_reduce.h index beefe09b738..4fc62cdf3d8 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_reduce.h +++ b/include/oneapi/dpl/pstl/omp/parallel_reduce.h @@ -52,8 +52,8 @@ __parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __la template _Value -__parallel_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity, - _RealBody __real_body, _Reduction __reduction) +__parallel_reduce(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Value __identity, _RealBody __real_body, _Reduction __reduction) { // We don't create a nested parallel region in an existing parallel region: // just create tasks. diff --git a/include/oneapi/dpl/pstl/omp/parallel_scan.h b/include/oneapi/dpl/pstl/omp/parallel_scan.h index 29c6c77be54..c3bc022cb2e 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_scan.h +++ b/include/oneapi/dpl/pstl/omp/parallel_scan.h @@ -82,13 +82,14 @@ __downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsi template void -__parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) +__parallel_strict_scan_body(_ExecutionPolicy&& __exec, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, + _Sp __scan, _Ap __apex) { _Index __p = omp_get_num_threads(); const _Index __slack = 4; _Index __tilesize = (__n - 1) / (__slack * __p) + 1; _Index __m = (__n - 1) / __tilesize; - __buffer<_ExecutionPolicy, _Tp> __buf(__m + 1); + __buffer<_ExecutionPolicy, _Tp> __buf(::std::forward<_ExecutionPolicy>(__exec), __m + 1); _Tp* __r = __buf.get(); oneapi::dpl::__omp_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, @@ -108,8 +109,8 @@ __parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combi template void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) +__parallel_strict_scan(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& __exec, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) { if (__n <= __default_chunk_size) { @@ -128,16 +129,16 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu if (omp_in_parallel()) { - oneapi::dpl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine, - __scan, __apex); + oneapi::dpl::__omp_backend::__parallel_strict_scan_body(::std::forward<_ExecutionPolicy>(__exec), __n, + __initial, __reduce, __combine, __scan, __apex); } else { _PSTL_PRAGMA(omp parallel) _PSTL_PRAGMA(omp single nowait) { - oneapi::dpl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, - __combine, __scan, __apex); + oneapi::dpl::__omp_backend::__parallel_strict_scan_body(::std::forward<_ExecutionPolicy>(__exec), __n, + __initial, __reduce, __combine, __scan, __apex); } } } diff --git a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h index 14aa7b7bf04..4633a3fcade 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h +++ b/include/oneapi/dpl/pstl/omp/parallel_stable_sort.h @@ -123,8 +123,9 @@ __parallel_stable_sort_body(_RandomAccessIterator __xs, _RandomAccessIterator __ template void -__parallel_stable_sort(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator __xs, _RandomAccessIterator __xe, - _Compare __comp, _LeafSort __leaf_sort, std::size_t __nsort = 0) +__parallel_stable_sort(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&& /*__exec*/, + _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, + std::size_t __nsort = 0) { auto __count = static_cast(__xe - __xs); if (__count <= __default_chunk_size || __nsort < __count) diff --git a/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h b/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h index d94e5fd36e9..2c6cf06577b 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h +++ b/include/oneapi/dpl/pstl/omp/parallel_transform_reduce.h @@ -86,8 +86,9 @@ __transform_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __l template _Value -__parallel_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, - _UnaryOp __unary_op, _Value __init, _Combiner __combiner, _Reduction __reduction) +__parallel_transform_reduce(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, + _RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, + _Value __init, _Combiner __combiner, _Reduction __reduction) { _Value __result = __init; if (omp_in_parallel()) diff --git a/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h b/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h index 98262635d1e..35c28b4330c 100644 --- a/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h +++ b/include/oneapi/dpl/pstl/omp/parallel_transform_scan.h @@ -27,8 +27,8 @@ namespace __omp_backend template _Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up /* __u */, _Tp __init, _Cp /* __combine */, - _Rp /* __brick_reduce */, _Sp __scan) +__parallel_transform_scan(oneapi::dpl::__internal::__omp_backend_tag, _ExecutionPolicy&&, _Index __n, _Up /* __u */, + _Tp __init, _Cp /* __combine */, _Rp /* __brick_reduce */, _Sp __scan) { // TODO: parallelize this function. return __scan(_Index(0), __n, __init); diff --git a/include/oneapi/dpl/pstl/omp/util.h b/include/oneapi/dpl/pstl/omp/util.h index bcbfecc23e4..e7c4e3cbc40 100644 --- a/include/oneapi/dpl/pstl/omp/util.h +++ b/include/oneapi/dpl/pstl/omp/util.h @@ -48,7 +48,7 @@ namespace __omp_backend // use to cancel execution //------------------------------------------------------------------------ inline void -__cancel_execution() +__cancel_execution(oneapi::dpl::__internal::__omp_backend_tag) { // TODO: Figure out how to make cancellation work. } @@ -68,9 +68,10 @@ class __buffer_impl operator=(const __buffer_impl&) = delete; public: - static_assert(::std::is_same_v<_ExecutionPolicy, ::std::decay_t<_ExecutionPolicy>>); - - __buffer_impl(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {} + __buffer_impl(_ExecutionPolicy /*__exec*/, std::size_t __n) + : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) + { + } operator bool() const { return __ptr_ != nullptr; } diff --git a/include/oneapi/dpl/pstl/parallel_backend.h b/include/oneapi/dpl/pstl/parallel_backend.h index 1e78d1f635b..b243e8fb492 100644 --- a/include/oneapi/dpl/pstl/parallel_backend.h +++ b/include/oneapi/dpl/pstl/parallel_backend.h @@ -18,14 +18,14 @@ // Select a parallel backend #if ONEDPL_USE_TBB_BACKEND || (!defined(ONEDPL_USE_TBB_BACKEND) && !ONEDPL_USE_OPENMP_BACKEND && _ONEDPL_TBB_AVAILABLE) -# include "parallel_backend_tbb.h" # define _ONEDPL_PAR_BACKEND_TBB 1 +# include "parallel_backend_tbb.h" #elif ONEDPL_USE_OPENMP_BACKEND || (!defined(ONEDPL_USE_OPENMP_BACKEND) && _ONEDPL_OPENMP_AVAILABLE) -# include "parallel_backend_omp.h" # define _ONEDPL_PAR_BACKEND_OPENMP 1 +# include "parallel_backend_omp.h" #else -# include "parallel_backend_serial.h" # define _ONEDPL_PAR_BACKEND_SERIAL 1 +# include "parallel_backend_serial.h" #endif #if _ONEDPL_BACKEND_SYCL diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index a2dd6468a34..edd6652d359 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -43,9 +43,10 @@ class __buffer_impl operator=(const __buffer_impl&) = delete; public: - static_assert(::std::is_same_v<_ExecutionPolicy, ::std::decay_t<_ExecutionPolicy>>); - - __buffer_impl(::std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {} + __buffer_impl(_ExecutionPolicy /*__exec*/, ::std::size_t __n) + : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) + { + } operator bool() const { return __ptr_ != nullptr; } _Tp* @@ -60,21 +61,22 @@ template using __buffer = __buffer_impl<::std::decay_t<_ExecutionPolicy>, _Tp>; inline void -__cancel_execution() +__cancel_execution(oneapi::dpl::__internal::__serial_backend_tag) { } template void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + _Fp __f) { __f(__first, __last); } template _Value -__parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, - const _RealBody& __real_body, const _Reduction&) +__parallel_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + const _Value& __identity, const _RealBody& __real_body, const _Reduction&) { if (__first == __last) { @@ -88,16 +90,16 @@ __parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Valu template _Tp -__parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _UnaryOp, _Tp __init, _BinaryOp, - _Reduce __reduce) +__parallel_transform_reduce(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, + _Index __last, _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce) { return __reduce(__first, __last, __init); } template void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) +__parallel_strict_scan(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) { _Tp __sum = __initial; if (__n) @@ -109,15 +111,16 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu template _Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _UnaryOp, _Tp __init, _BinaryOp, _Reduce, _Scan __scan) +__parallel_transform_scan(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _UnaryOp, + _Tp __init, _BinaryOp, _Reduce, _Scan __scan) { return __scan(_Index(0), __n, __init); } template void -__parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, - _LeafSort __leaf_sort, ::std::size_t = 0) +__parallel_stable_sort(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first, + _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort, ::std::size_t = 0) { __leaf_sort(__first, __last, __comp); } @@ -125,16 +128,16 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __first, _Rando template void -__parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, - _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _RandomAccessIterator3 __outit, - _Compare __comp, _LeafMerge __leaf_merge) +__parallel_merge(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __first1, + _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, + _RandomAccessIterator3 __outit, _Compare __comp, _LeafMerge __leaf_merge) { __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp); } template void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +__parallel_invoke(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) { ::std::forward<_F1>(__f1)(); ::std::forward<_F2>(__f2)(); @@ -142,7 +145,8 @@ __parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) template void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) +__parallel_for_each(oneapi::dpl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, + _ForwardIterator __end, _Fp __f) { for (auto __iter = __begin; __iter != __end; ++__iter) __f(*__iter); diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 556e305e1c7..2ddfa61007a 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -23,6 +23,7 @@ #include #include "parallel_backend_utils.h" +#include "execution_impl.h" // Bring in minimal required subset of Intel(R) Threading Building Blocks (Intel(R) TBB) #include @@ -64,10 +65,11 @@ class __buffer_impl operator=(const __buffer_impl&) = delete; public: - static_assert(::std::is_same_v<_ExecutionPolicy, ::std::decay_t<_ExecutionPolicy>>); - //! Try to obtain buffer of given size to store objects of _Tp type - __buffer_impl(const ::std::size_t __n) : _M_allocator(), _M_ptr(_M_allocator.allocate(__n)), _M_buf_size(__n) {} + __buffer_impl(_ExecutionPolicy /*__exec*/, const ::std::size_t __n) + : _M_allocator(), _M_ptr(_M_allocator.allocate(__n)), _M_buf_size(__n) + { + } //! True if buffer was successfully obtained, zero otherwise. operator bool() const { return _M_ptr != nullptr; } //! Return pointer to buffer, or nullptr if buffer could not be obtained. @@ -85,7 +87,7 @@ using __buffer = __buffer_impl<::std::decay_t<_ExecutionPolicy>, _Tp>; // Wrapper for tbb::task inline void -__cancel_execution() +__cancel_execution(oneapi::dpl::__internal::__tbb_backend_tag) { #if TBB_INTERFACE_VERSION <= 12000 tbb::task::self().group()->cancel_group_execution(); @@ -118,7 +120,7 @@ class __parallel_for_body // wrapper over tbb::parallel_for template void -__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) +__parallel_for(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) { tbb::this_task_arena::isolate([=]() { tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), __parallel_for_body<_Index, _Fp>(__f)); @@ -129,8 +131,8 @@ __parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f) // wrapper over tbb::parallel_reduce template _Value -__parallel_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, const _Value& __identity, - const _RealBody& __real_body, const _Reduction& __reduction) +__parallel_reduce(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, + const _Value& __identity, const _RealBody& __real_body, const _Reduction& __reduction) { return tbb::this_task_arena::isolate([__first, __last, &__identity, &__real_body, &__reduction]() -> _Value { return tbb::parallel_reduce( @@ -210,8 +212,8 @@ struct __par_trans_red_body template _Tp -__parallel_transform_reduce(_ExecutionPolicy&&, _Index __first, _Index __last, _Up __u, _Tp __init, _Cp __combine, - _Rp __brick_reduce) +__parallel_transform_reduce(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, + _Index __last, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce) { __tbb_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce); // The grain size of 3 is used in order to provide minimum 2 elements for each body @@ -379,8 +381,8 @@ __downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsi // T must have a trivial constructor and destructor. template void -__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, - _Ap __apex) +__parallel_strict_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&& __exec, _Index __n, _Tp __initial, + _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex) { tbb::this_task_arena::isolate([=, &__combine]() { if (__n > 1) @@ -389,7 +391,7 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu const _Index __slack = 4; _Index __tilesize = (__n - 1) / (__slack * __p) + 1; _Index __m = (__n - 1) / __tilesize; - __tbb_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__m + 1); + __tbb_backend::__buffer<_ExecutionPolicy, _Tp> __buf(__exec, __m + 1); _Tp* __r = __buf.get(); __tbb_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, __combine); @@ -419,8 +421,8 @@ __parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __redu template _Tp -__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce, - _Sp __scan) +__parallel_transform_scan(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Up __u, + _Tp __init, _Cp __combine, _Rp __brick_reduce, _Sp __scan) { __trans_scan_body<_Index, _Up, _Tp, _Cp, _Rp, _Sp> __body(__u, __init, __combine, __brick_reduce, __scan); auto __range = tbb::blocked_range<_Index>(0, __n); @@ -1182,8 +1184,9 @@ __stable_sort_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _Le template void -__parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, - _LeafSort __leaf_sort, ::std::size_t __nsort) +__parallel_stable_sort(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&& __exec, + _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, + ::std::size_t __nsort) { tbb::this_task_arena::isolate([=, &__nsort]() { //sorting based on task tree and parallel merge @@ -1194,7 +1197,7 @@ __parallel_stable_sort(_ExecutionPolicy&&, _RandomAccessIterator __xs, _RandomAc const _DifferenceType __sort_cut_off = _ONEDPL_STABLE_SORT_CUT_OFF; if (__n > __sort_cut_off) { - __tbb_backend::__buffer<_ExecutionPolicy, _ValueType> __buf(__n); + __tbb_backend::__buffer<_ExecutionPolicy, _ValueType> __buf(__exec, __n); __root_task<__stable_sort_func<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>> __root{ __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()}; __task::spawn_root_and_wait(__root); @@ -1274,9 +1277,9 @@ operator()(__task* __self) template void -__parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, - _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, - _LeafMerge __leaf_merge) +__parallel_merge(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __xs, + _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, + _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge) { typedef typename ::std::iterator_traits<_RandomAccessIterator1>::difference_type _DifferenceType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::difference_type _DifferenceType2; @@ -1303,9 +1306,10 @@ __parallel_merge(_ExecutionPolicy&&, _RandomAccessIterator1 __xs, _RandomAccessI //------------------------------------------------------------------------ // parallel_invoke //------------------------------------------------------------------------ + template void -__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) +__parallel_invoke(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) { //TODO: a version of tbb::this_task_arena::isolate with variadic arguments pack should be added in the future tbb::this_task_arena::isolate( @@ -1315,9 +1319,11 @@ __parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2) //------------------------------------------------------------------------ // parallel_for_each //------------------------------------------------------------------------ + template void -__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __begin, _ForwardIterator __end, _Fp __f) +__parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _ForwardIterator __begin, + _ForwardIterator __end, _Fp __f) { tbb::this_task_arena::isolate([&]() { tbb::parallel_for_each(__begin, __end, __f); }); } diff --git a/include/oneapi/dpl/pstl/parallel_impl.h b/include/oneapi/dpl/pstl/parallel_impl.h index a2d7d20e562..66d9d8d1741 100644 --- a/include/oneapi/dpl/pstl/parallel_impl.h +++ b/include/oneapi/dpl/pstl/parallel_impl.h @@ -32,10 +32,13 @@ namespace __internal //----------------------------------------------------------------------- /** Return extremum value returned by brick f[i,j) for subranges [i,j) of [first,last) Each f[i,j) must return a value in [i,j). */ -template +template _Index -__parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, _IsFirst) +__parallel_find(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f, + _IsFirst) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + typedef typename ::std::iterator_traits<_Index>::difference_type _DifferenceType; const _DifferenceType __n = __last - __first; _DifferenceType __initial_dist = _IsFirst::value ? __n : -1; @@ -44,7 +47,7 @@ __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick ::std::atomic<_DifferenceType> __extremum(__initial_dist); // TODO: find out what is better here: parallel_for or parallel_reduce - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__comp, __f, __first, &__extremum](_Index __i, _Index __j) { // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of // why using a shared variable scales fairly well in this situation. @@ -70,17 +73,19 @@ __parallel_find(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick // parallel_or //------------------------------------------------------------------------ //! Return true if brick f[i,j) returns true for some subrange [i,j) of [first,last) -template +template bool -__parallel_or(_ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) +__parallel_or(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f) { + using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; + ::std::atomic __found(false); - __par_backend::__parallel_for(::std::forward<_ExecutionPolicy>(__exec), __first, __last, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__f, &__found](_Index __i, _Index __j) { if (!__found.load(::std::memory_order_relaxed) && __f(__i, __j)) { __found.store(true, ::std::memory_order_relaxed); - __par_backend::__cancel_execution(); + __par_backend::__cancel_execution(__backend_tag{}); } }); return __found; diff --git a/test/parallel_api/experimental/for_loop.pass.cpp b/test/parallel_api/experimental/for_loop.pass.cpp index 8c59ffe1afd..725d2bc9747 100644 --- a/test/parallel_api/experimental/for_loop.pass.cpp +++ b/test/parallel_api/experimental/for_loop.pass.cpp @@ -296,8 +296,9 @@ test_for_loop() Sequence in_out(n, Gen()); Sequence expected = in_out; - invoke_on_all_policies<>()(test_for_loop_impl(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), - in_out.size()); + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_for_loop_impl(), in_out.begin(), in_out.end(), expected.begin(), + expected.end(), in_out.size()); } } @@ -313,8 +314,9 @@ test_for_loop_strided() ::std::vector strides = {1, 2, 10, n > 1 ? n - 1 : 1, n > 0 ? n : 1, n + 1}; for (size_t stride : strides) { - invoke_on_all_policies<>()(test_for_loop_strided_impl(), in_out.begin(), in_out.end(), expected.begin(), - expected.end(), in_out.size(), stride); + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_for_loop_strided_impl(), in_out.begin(), in_out.end(), expected.begin(), + expected.end(), in_out.size(), stride); } } } diff --git a/test/parallel_api/experimental/for_loop_induction.pass.cpp b/test/parallel_api/experimental/for_loop_induction.pass.cpp index 505ceb54e1e..e01569c58a1 100644 --- a/test/parallel_api/experimental/for_loop_induction.pass.cpp +++ b/test/parallel_api/experimental/for_loop_induction.pass.cpp @@ -153,8 +153,10 @@ test() { Sequence in_out(n, [](long int k) { return T(k % 5 != 1 ? 3 * k - 7 : 0); }); Sequence expected = in_out; - invoke_on_all_policies<>()(test_body(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), - in_out.size()); + + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_body(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), + in_out.size()); } } diff --git a/test/parallel_api/experimental/for_loop_reduction.pass.cpp b/test/parallel_api/experimental/for_loop_reduction.pass.cpp index cfd918ff2bd..7f6b2b9e7f5 100644 --- a/test/parallel_api/experimental/for_loop_reduction.pass.cpp +++ b/test/parallel_api/experimental/for_loop_reduction.pass.cpp @@ -77,8 +77,10 @@ test() { Sequence in_out(n, [](long int k) { return T(k % 5 != 1 ? 3 * k - 7 : 0); }); Sequence expected = in_out; - invoke_on_all_policies<>()(test_body(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), - in_out.size()); + + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_body(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), + in_out.size()); } } @@ -176,10 +178,12 @@ test_predefined(::std::initializer_list init_list) // Just arbitrary numbers Sequence in_out = init_list; Sequence expected = in_out; - invoke_on_all_policies<>()(test_body_predefined(), in_out.begin(), in_out.end(), expected.begin(), expected.end(), - in_out.size()); - invoke_on_all_policies<>()(test_body_predefined_bits(), in_out.begin(), in_out.end(), expected.begin(), - expected.end(), in_out.size()); + + // for_loop staff is implemented for the host policies only + invoke_on_all_host_policies()(test_body_predefined(), in_out.begin(), in_out.end(), expected.begin(), + expected.end(), in_out.size()); + invoke_on_all_host_policies()(test_body_predefined_bits(), in_out.begin(), in_out.end(), expected.begin(), + expected.end(), in_out.size()); } void From cd565891f4ffdd0b4641810a38c60c683e5f1fe0 Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" Date: Thu, 21 Mar 2024 12:57:11 +0000 Subject: [PATCH 07/29] shp namespace moved to experimental --- .../distributed_ranges_impl/concepts/concepts.hpp | 5 +++-- .../distributed_ranges_impl/detail/enumerate.hpp | 4 ++-- .../internal/distributed_ranges_impl/detail/index.hpp | 4 ++-- .../detail/iterator_adaptor.hpp | 4 ++-- .../distributed_ranges_impl/detail/logger.hpp | 4 ++-- .../detail/onedpl_direct_iterator.hpp | 4 ++-- .../distributed_ranges_impl/detail/owning_view.hpp | 4 ++-- .../distributed_ranges_impl/detail/ranges.hpp | 4 ++-- .../detail/remote_subrange.hpp | 4 ++-- .../distributed_ranges_impl/detail/segments_tools.hpp | 4 ++-- .../distributed_ranges_impl/detail/sycl_utils.hpp | 8 ++++---- .../internal/distributed_ranges_impl/detail/utils.hpp | 4 ++-- .../distributed_ranges_impl/detail/view_detectors.hpp | 4 ++-- .../distributed_ranges_impl/shp/algorithms/copy.hpp | 4 ++-- .../shp/algorithms/exclusive_scan.hpp | 4 ++-- .../shp/algorithms/execution_policy.hpp | 4 ++-- .../distributed_ranges_impl/shp/algorithms/fill.hpp | 4 ++-- .../shp/algorithms/for_each.hpp | 4 ++-- .../shp/algorithms/inclusive_scan.hpp | 4 ++-- .../distributed_ranges_impl/shp/algorithms/iota.hpp | 4 ++-- .../shp/algorithms/matrix/gemm.hpp | 4 ++-- .../shp/algorithms/matrix/gemv.hpp | 4 ++-- .../shp/algorithms/matrix/local_gemm.hpp | 4 ++-- .../shp/algorithms/matrix/local_gemv.hpp | 4 ++-- .../distributed_ranges_impl/shp/algorithms/reduce.hpp | 4 ++-- .../distributed_ranges_impl/shp/algorithms/sort.hpp | 4 ++-- .../shp/algorithms/transform.hpp | 4 ++-- .../distributed_ranges_impl/shp/allocators.hpp | 4 ++-- .../distributed_ranges_impl/shp/containers/detail.hpp | 4 ++-- .../shp/containers/distributed_dense_matrix.hpp | 4 ++-- .../shp/containers/duplicated_vector.hpp | 4 ++-- .../shp/containers/matrix_entry.hpp | 8 ++++---- .../shp/containers/matrix_partition.hpp | 6 +++--- .../shp/containers/sequential/dense_matrix.hpp | 4 ++-- .../shp/containers/sparse_matrix.hpp | 4 ++-- .../internal/distributed_ranges_impl/shp/detail.hpp | 4 ++-- .../distributed_ranges_impl/shp/device_ptr.hpp | 4 ++-- .../distributed_ranges_impl/shp/device_ref.hpp | 4 ++-- .../distributed_ranges_impl/shp/device_span.hpp | 4 ++-- .../distributed_ranges_impl/shp/device_vector.hpp | 4 ++-- .../distributed_ranges_impl/shp/distributed_span.hpp | 4 ++-- .../shp/distributed_vector.hpp | 4 ++-- .../internal/distributed_ranges_impl/shp/future.hpp | 4 ++-- .../dpl/internal/distributed_ranges_impl/shp/init.hpp | 4 ++-- .../internal/distributed_ranges_impl/shp/range.hpp | 4 ++-- .../distributed_ranges_impl/shp/range_adaptors.hpp | 4 ++-- .../dpl/internal/distributed_ranges_impl/shp/span.hpp | 4 ++-- .../dpl/internal/distributed_ranges_impl/shp/util.hpp | 4 ++-- .../distributed_ranges_impl/shp/util/coo_matrix.hpp | 4 ++-- .../shp/util/generate_random.hpp | 4 ++-- .../distributed_ranges_impl/shp/util/matrix_io.hpp | 4 ++-- .../internal/distributed_ranges_impl/shp/vector.hpp | 4 ++-- .../shp/views/csr_matrix_view.hpp | 4 ++-- .../shp/views/dense_column_view.hpp | 4 ++-- .../shp/views/dense_matrix_iterator.hpp | 4 ++-- .../shp/views/dense_matrix_view.hpp | 4 ++-- .../shp/views/dense_row_view.hpp | 4 ++-- .../distributed_ranges_impl/shp/views/enumerate.hpp | 4 ++-- .../shp/views/standard_views.hpp | 4 ++-- .../distributed_ranges_impl/shp/views/views.hpp | 4 ++-- .../internal/distributed_ranges_impl/shp/zip_view.hpp | 8 ++++---- .../internal/distributed_ranges_impl/views/iota.hpp | 4 ++-- .../distributed_ranges_impl/views/transform.hpp | 4 ++-- .../internal/distributed_ranges_impl/views/views.hpp | 4 ++-- test/distributed-ranges/shp/CMakeLists.txt | 11 ++++------- 65 files changed, 140 insertions(+), 142 deletions(-) diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp index 0894f710336..0f187440537 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp @@ -6,7 +6,8 @@ #include -namespace dr { +namespace experimental +{ template concept remote_iterator = @@ -56,4 +57,4 @@ concept distributed_contiguous_iterator = remote_contiguous_range()))>>; -} // namespace dr +} // namespace experimental diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp index a7ac841052c..5c558cb9326 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp @@ -6,7 +6,7 @@ #include -namespace dr { +namespace experimental { namespace __detail { @@ -59,4 +59,4 @@ inline constexpr auto enumerate = enumerate_fn_{}; } // namespace __detail -} // namespace dr +} // namespace experimental diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp index a317e97bcf9..f7d699c12a9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr { +namespace experimental { namespace { template @@ -96,7 +96,7 @@ template class index { index_type second; }; -} // namespace dr +} // namespace experimental namespace std { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp index bc6a63b8db9..6ab6339576c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp @@ -9,7 +9,7 @@ #include -namespace dr { +namespace experimental { namespace { @@ -197,4 +197,4 @@ template class iterator_adaptor { accessor_type accessor_; }; -} // namespace dr +} // namespace experimental diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp index 50642205c57..dd911ef18a6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp @@ -13,7 +13,7 @@ #include "format_shim.hpp" #include "ranges_shim.hpp" -namespace dr { +namespace experimental { class timer { public: @@ -111,4 +111,4 @@ inline logger drlog; #define DRLOG(...) \ dr::drlog.debug(nostd::source_location::current(), __VA_ARGS__) -} // namespace dr +} // namespace experimental diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp index 86c572ef200..b8a5ec9adf4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp @@ -8,7 +8,7 @@ #include -namespace dr { +namespace experimental { namespace __detail { @@ -108,4 +108,4 @@ template class direct_iterator { } // namespace __detail -} // namespace dr +} // namespace experimental diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp index d6027c6823b..1d1d7128f53 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp @@ -6,7 +6,7 @@ #include -namespace dr { +namespace experimental { namespace __detail { @@ -66,4 +66,4 @@ class owning_view : public rng::view_interface> { } // namespace __detail -} // namespace dr +} // namespace experimental diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp index 11fa8d5c524..e9c50193709 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp @@ -10,7 +10,7 @@ #include -namespace dr { +namespace experimental { namespace ranges { @@ -259,4 +259,4 @@ inline constexpr auto local = local_fn_{}; } // namespace ranges -} // namespace dr +} // namespace experimental diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp index 50147347693..75f1c232f8e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr { +namespace experimental { template class remote_subrange : public rng::subrange { @@ -43,7 +43,7 @@ remote_subrange(R &&, std::size_t) -> remote_subrange>; template remote_subrange(R &&) -> remote_subrange>; -} // namespace dr +} // namespace experimental #if !defined(DR_SPEC) diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp index 59fce0dd761..b4218ba27ab 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp @@ -10,7 +10,7 @@ #include #include -namespace dr { +namespace experimental { namespace __detail { @@ -91,7 +91,7 @@ template auto drop_segments(R &&segments, std::size_t n) { } // namespace __detail -} // namespace dr +} // namespace experimental namespace DR_RANGES_NAMESPACE { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp index 143f3e8612e..300f9a7569a 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp @@ -12,7 +12,7 @@ #include -namespace dr::__detail { +namespace experimental::__detail { // With the ND-range workaround, the maximum kernel size is // `std::numeric_limits::max()` rounded down to @@ -191,17 +191,17 @@ sycl::event parallel_for(sycl::queue &q, sycl::range<3> global, Fn &&fn) { using event = sycl::event; -} // namespace dr::__detail +} // namespace experimental::__detail #else -namespace dr::__detail { +namespace experimental::__detail { class event { public: void wait() {} }; -} // namespace dr::__detail +} // namespace experimental::__detail #endif // SYCL_LANGUAGE_VERSION diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp index b972153c11d..ebdb37f9394 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp @@ -4,7 +4,7 @@ #pragma once -namespace dr::__detail { +namespace experimental::__detail { inline std::size_t round_up(std::size_t n, std::size_t multiple) { if (multiple == 0) { @@ -27,4 +27,4 @@ inline std::size_t partition_up(std::size_t n, std::size_t multiple) { return round_up(n, multiple) / multiple; } -} // namespace dr::__detail +} // namespace experimental::__detail diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp index edd50c285cc..1ddeb9fcb00 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp @@ -6,7 +6,7 @@ #include -namespace dr { +namespace experimental { template struct is_ref_view : std::false_type {}; template @@ -56,4 +56,4 @@ struct is_zip_view> : std::true_type {}; template inline constexpr bool is_zip_view_v = is_zip_view::value; -} // namespace dr +} // namespace experimental diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp index ebdc6425da3..4eb78340bd6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { // Copy between contiguous ranges template @@ -225,4 +225,4 @@ O copy(R &&r, O result) { return copy(rng::begin(r), rng::end(r), result); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp index 0ae647b29c8..3bb69fa7544 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp @@ -20,7 +20,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template @@ -222,4 +222,4 @@ void exclusive_scan(Iter first, Iter last, OutputIter d_first, T init) { exclusive_scan(dr::shp::par_unseq, first, last, d_first, init); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp index 8bdfa00f651..3ea808edcbd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { struct device_policy { device_policy(sycl::device device) : devices_({device}) {} @@ -31,4 +31,4 @@ struct device_policy { std::vector devices_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp index 26226c63f21..6fac99ffb86 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template requires(!std::is_const_v> && @@ -91,4 +91,4 @@ auto fill(Iter first, Iter last, const T &value) { return last; } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp index 0266bdea344..8455b68b7bb 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp @@ -13,7 +13,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { @@ -54,4 +54,4 @@ void for_each(Iter begin, Iter end, Fn &&fn) { for_each(dr::shp::par_unseq, begin, end, std::forward(fn)); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp index 15fbd9467dc..32b757af0c1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp @@ -22,7 +22,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template (binary_op), init); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp index bfc250abfe6..1cd8793107f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp @@ -11,7 +11,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template void iota(R &&r, T value) { auto iota_view = rng::views::iota(value, T(value + rng::distance(r))); @@ -28,4 +28,4 @@ void iota(Iter begin, Iter end, T value) { iota(r, value); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp index e860a5ed5a9..a6f8eef3b98 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template void gemm(distributed_dense_matrix &a, distributed_dense_matrix &b, @@ -242,4 +242,4 @@ void gemm_buffered_async(distributed_dense_matrix &a, } } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp index a4c0842f744..73187f0653a 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp @@ -14,7 +14,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template @@ -205,4 +205,4 @@ void gemv_square_copy(C &&c, dr::shp::sparse_matrix &a, B &&b) { __detail::wait(events); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp index b7cd17dcc11..3220cd93ab6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp @@ -10,7 +10,7 @@ #include #endif -namespace dr::shp { +namespace experimental::shp { namespace __detail { @@ -81,4 +81,4 @@ auto local_gemm(sycl::queue &q, shp::dense_matrix_view a, } // namespace __detail -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp index 142792ecfde..b9a536ebadf 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp @@ -12,7 +12,7 @@ #include #endif -namespace dr::shp { +namespace experimental::shp { namespace __detail { @@ -97,4 +97,4 @@ auto local_gemv(sycl::queue &q, csr_matrix_view a, Iter b, } // namespace __detail -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp index 7bfd00eb178..64aa55ae848 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp @@ -51,7 +51,7 @@ auto reduce_no_init_async(ExecutionPolicy &&policy, Iter first, Iter last, } // namespace -namespace dr::shp { +namespace experimental::shp { template @@ -164,4 +164,4 @@ T reduce(Iter first, Iter last, T init, BinaryOp &&binary_op) { std::forward(binary_op)); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp index b0a595cc62c..2735114f132 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp @@ -14,7 +14,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { namespace __detail { @@ -285,4 +285,4 @@ void sort(RandomIt first, RandomIt last, Compare comp = Compare()) { sort(rng::subrange(first, last), comp); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp index 50eebd0698e..1ee15f1bf11 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { /** * Applies the given function to a range and stores the result in another range, @@ -91,4 +91,4 @@ auto transform(Iter1 in_begin, Iter1 in_end, Iter2 out_end, Fn &&fn) { std::forward(fn)); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp index 10beee77ca9..07c58e0ed58 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp @@ -10,7 +10,7 @@ #include -namespace dr::shp { +namespace experimental::shp { template using shared_allocator = sycl::usm_allocator; @@ -124,4 +124,4 @@ template class buffered_allocator { std::shared_ptr> buffers_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp index e0cf3175241..f0bcf125336 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp @@ -6,7 +6,7 @@ #include -namespace dr::shp { +namespace experimental::shp { namespace detail { @@ -25,4 +25,4 @@ inline std::tuple factor(std::size_t n) { } // namespace detail -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp index c70f5aff017..b208106bb09 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class distributed_dense_matrix_accessor { public: @@ -329,4 +329,4 @@ template class distributed_dense_matrix { std::vector>> tiles_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp index 904458e5777..e53b13f6d64 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template > class duplicated_vector { @@ -45,4 +45,4 @@ class duplicated_vector { std::size_t size_ = 0; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp index df29dda07df..b62ea5e4c46 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp @@ -10,7 +10,7 @@ #include -namespace dr::shp { +namespace experimental::shp { template class matrix_entry { public: @@ -85,7 +85,7 @@ template class matrix_entry { map_type value_; }; -} // namespace dr::shp +} // namespace experimental::shp namespace std { @@ -107,7 +107,7 @@ struct tuple_size> : integral_constant { } // namespace std -namespace dr::shp { +namespace experimental::shp { template class matrix_ref { @@ -184,7 +184,7 @@ class matrix_ref { scalar_reference value_; }; -} // namespace dr::shp +} // namespace experimental::shp namespace std { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp index 5574450ffc5..cc8043ce66e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { namespace tile { @@ -87,7 +87,7 @@ class block_cyclic final : public matrix_partition { dr::index<> tile_shape_; dr::index<> grid_shape_; -}; // namespace dr::shp +}; // namespace experimental::shp inline std::vector partition_matmul(std::size_t m, std::size_t n, std::size_t k) { @@ -112,4 +112,4 @@ inline std::vector partition_matmul(std::size_t m, std::size_t n, return {a_block, b_block, c_block}; } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp index 92b25a5e34e..c9cd48fc251 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp @@ -13,7 +13,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template > class dense_matrix { @@ -140,4 +140,4 @@ class dense_matrix { size_type ld_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp index 909009139e4..292fd0edfea 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template requires(rng::viewable_range) @@ -410,4 +410,4 @@ template class sparse_matrix { std::vector segments_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp index b4d76f98358..e3c6d70e673 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp @@ -11,7 +11,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { namespace __detail { @@ -87,4 +87,4 @@ inline void wait(const std::vector &events) { } // namespace __detail -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp index 71dd37a5ae6..ddd7e328b35 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp @@ -10,7 +10,7 @@ #include -namespace dr::shp { +namespace experimental::shp { template requires(std::is_trivially_copyable_v || std::is_void_v) @@ -141,4 +141,4 @@ class device_ptr { T *pointer_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp index 8b51ca2122e..f93ee74afde 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template requires(std::is_trivially_copyable_v || std::is_void_v) @@ -57,4 +57,4 @@ class device_ref { T *pointer_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp index 2a779f900c2..84b238d964f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { // A `device_span` is simply a normal `std::span` that's // been decorated with an extra `rank()` function, showing @@ -94,4 +94,4 @@ template device_span(R &&, std::size_t) -> device_span, rng::iterator_t>; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp index 0d6c97a5dcf..e4e60051cab 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class device_vector : public dr::shp::vector { @@ -34,4 +34,4 @@ template device_vector(std::size_t, const Alloc, std::size_t) -> device_vector; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp index 265053504b7..824c82cdaee 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp @@ -12,7 +12,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class distributed_span_accessor { public: @@ -254,4 +254,4 @@ distributed_span(R &&r) -> distributed_span< rng::range_value_t, rng::iterator_t>>; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp index a459a8df072..8a1ced9d36d 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp @@ -14,7 +14,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class distributed_vector_accessor { public: @@ -214,4 +214,4 @@ struct distributed_vector { std::size_t segment_size_ = 0; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp index 185dcff1016..9ae45f93631 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp @@ -9,7 +9,7 @@ #include -namespace dr::shp { +namespace experimental::shp { template class future { public: @@ -45,4 +45,4 @@ template class future { std::vector events_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp index b31eb2b3fa5..67240aa4d49 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { namespace __detail { @@ -103,4 +103,4 @@ inline auto &dpl_policy(std::size_t rank) { } // namespace __detail -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp index 36fb81dd3c5..bb2e1a20efd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class id { public: @@ -149,4 +149,4 @@ template auto distributed_iota_view(R &&r) { } */ -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp index 4373e51bb5b..5149705e778 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp @@ -7,11 +7,11 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template auto enumerate(R &&r) { auto i = rng::views::iota(uint32_t(0), uint32_t(rng::size(r))); return dr::shp::zip_view(i, r); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp index e297405d30c..1c8dc65fe40 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp @@ -8,7 +8,7 @@ #include -namespace dr::shp { +namespace experimental::shp { template class span : public rng::view_interface> { @@ -58,4 +58,4 @@ span(R &&) -> span, rng::iterator_t>; template span(Iter first, std::size_t count) -> span, Iter>; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp index 479d3bf5207..dd053649382 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template sycl::device select_device(Selector &&selector) { sycl::device d; @@ -243,4 +243,4 @@ concept sycl_device_selector = requires(T &t, const sycl::device &device) { } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp index f95649a0135..2690520fb9c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { namespace __detail { @@ -167,4 +167,4 @@ class coo_matrix { } // namespace __detail -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp index f5ebc9ae9b5..789675f6d02 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { namespace { @@ -89,4 +89,4 @@ auto generate_random_csr(dr::index shape, double density = 0.01, return csr_matrix_view(values, rowptr, colind, shape, nnz, 0); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp index ff6bf29c357..330562920cb 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { namespace __detail { @@ -286,4 +286,4 @@ auto mmread(std::string file_path, bool one_indexed = true) { one_indexed); } -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp index 7860e1f22ed..b0d66a84b4c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp @@ -6,7 +6,7 @@ #include -namespace dr::shp { +namespace experimental::shp { // TODO: deal properly with non-trivially destructible types // - constructors, destructors, assign @@ -245,4 +245,4 @@ template > class vector { allocator_type allocator_; }; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp index fecf63954ef..ca1b8dcb493 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class csr_matrix_view_accessor { @@ -222,4 +222,4 @@ csr_matrix_view(TIter, IIter, IIter, Args &&...) -> csr_matrix_view, std::iter_value_t, TIter, IIter>; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp index 71286bd4734..bf5c522016f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class dense_matrix_column_accessor { public: using size_type = std::size_t; @@ -108,4 +108,4 @@ template dense_matrix_column_view(Iter, std::size_t, std::size_t, std::size_t) -> dense_matrix_column_view, Iter>; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp index fb2ff89b914..afb5a643131 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp @@ -12,7 +12,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class dense_matrix_accessor { public: @@ -106,4 +106,4 @@ using dense_matrix_iterator = template using dense_matrix_view_iterator = dense_matrix_iterator; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp index dfe28d46f68..912641e86d6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp @@ -14,7 +14,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class dense_matrix_view @@ -121,4 +121,4 @@ template dense_matrix_view(dense_matrix &) -> dense_matrix_view::pointer>; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp index 18d8e1d82b0..e15612bdda1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { template class dense_matrix_row_accessor { public: using size_type = std::size_t; @@ -106,4 +106,4 @@ template dense_matrix_row_view(Iter, std::size_t, std::size_t) -> dense_matrix_row_view, Iter>; -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp index c3455e9585a..6a259586f8b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp @@ -6,7 +6,7 @@ #include -namespace dr::shp { +namespace experimental::shp { namespace views { @@ -53,4 +53,4 @@ inline constexpr auto enumerate = enumerate_fn_{}; } // namespace views -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp index aa5887c50cd..1443900f59b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp @@ -11,7 +11,7 @@ #include #include -namespace dr::shp { +namespace experimental::shp { namespace views { @@ -44,4 +44,4 @@ inline auto slice(dr::index<> slice_indices) { } // namespace views -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp index 3f7e4449266..4284d05dbe7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr::shp::views { +namespace experimental::shp::views { inline constexpr auto all = rng::views::all; @@ -23,4 +23,4 @@ inline constexpr auto take = rng::views::take; inline constexpr auto transform = dr::views::transform; -} // namespace dr::shp::views +} // namespace experimental::shp::views diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp index 81971f834bc..b0ec7109f06 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp @@ -12,7 +12,7 @@ #include #include -namespace dr { +namespace experimental { template struct is_owning_view : std::false_type {}; // template @@ -21,9 +21,9 @@ template struct is_owning_view : std::false_type {}; template inline constexpr bool is_owning_view_v = is_owning_view{}; -}; // namespace dr +}; // namespace experimental -namespace dr::shp { +namespace experimental::shp { namespace __detail { @@ -346,4 +346,4 @@ template auto zip(Rs &&...rs) { } // namespace views -} // namespace dr::shp +} // namespace experimental::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp index 3301bfa0c1e..2477dd4f659 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp @@ -4,7 +4,7 @@ #pragma once -namespace dr::views { +namespace experimental::views { // // range-v3 iota uses sentinels that are not the same type as the @@ -24,4 +24,4 @@ struct iota_fn_ { inline constexpr auto iota = iota_fn_{}; -} // namespace dr::views +} // namespace experimental::views diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp index cbf35f084e4..dfc47bc64b0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp @@ -11,7 +11,7 @@ #include #include -namespace dr { +namespace experimental { template class transform_iterator { @@ -195,7 +195,7 @@ class transform_fn_ { inline constexpr auto transform = transform_fn_{}; } // namespace views -} // namespace dr +} // namespace experimental #if !defined(DR_SPEC) diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp index 72cf8ea162f..dc1a334da26 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr { +namespace experimental { // returns range: [(rank, element) ...] auto ranked_view(const dr::distributed_range auto &r) { @@ -15,4 +15,4 @@ auto ranked_view(const dr::distributed_range auto &r) { return rng::views::zip(rng::views::transform(r, rank), r); } -} // namespace dr +} // namespace experimental diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index 17fa366ef16..0325a4dd7f6 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -54,12 +54,8 @@ if($(HIPSYCL_TARGETS)) target_compile_options(dr_shp INTERFACE --hipsycl-targets='cuda:sm_75') endif() - set(CMAKE_INCLUDE_CURRENT_DIR ON) - - - add_executable( shp-tests shp-tests.cpp ../common/all.cpp ../common/copy.cpp ../common/counted.cpp @@ -74,10 +70,11 @@ add_executable( add_executable(shp-tests-3 shp-tests.cpp containers-3.cpp copy-3.cpp) # skeleton for rapid builds of individual tests, feel free to change this -add_executable(shp-quick-test shp-tests.cpp ../common/transform.cpp) -target_compile_definitions(shp-quick-test PRIVATE QUICK_TEST) +# add_executable(shp-quick-test shp-tests.cpp ../common/transform.cpp) +# target_compile_definitions(shp-quick-test PRIVATE QUICK_TEST) -foreach(test-exec IN ITEMS shp-tests shp-tests-3 shp-quick-test) +#foreach(test-exec IN ITEMS shp-tests shp-tests-3 shp-quick-test) +foreach(test-exec IN ITEMS shp-tests shp-tests-3) target_link_libraries(${test-exec} GTest::gtest_main DR::shp fmt::fmt cxxopts) endforeach() From 9e973058b9fcbaf953ea928a1b645a9b6a4480dc Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" Date: Thu, 21 Mar 2024 14:18:01 +0000 Subject: [PATCH 08/29] namespace update --- .../concepts/concepts.hpp | 27 ++-- .../detail/enumerate.hpp | 4 +- .../distributed_ranges_impl/detail/index.hpp | 10 +- .../detail/iterator_adaptor.hpp | 4 +- .../distributed_ranges_impl/detail/logger.hpp | 6 +- .../detail/onedpl_direct_iterator.hpp | 4 +- .../detail/owning_view.hpp | 4 +- .../distributed_ranges_impl/detail/ranges.hpp | 10 +- .../detail/remote_subrange.hpp | 12 +- .../detail/segments_tools.hpp | 40 +++--- .../detail/sycl_utils.hpp | 8 +- .../distributed_ranges_impl/detail/utils.hpp | 4 +- .../detail/view_detectors.hpp | 4 +- .../shp/algorithms/copy.hpp | 50 +++---- .../shp/algorithms/exclusive_scan.hpp | 78 +++++------ .../shp/algorithms/execution_policy.hpp | 4 +- .../shp/algorithms/fill.hpp | 30 ++-- .../shp/algorithms/for_each.hpp | 22 +-- .../shp/algorithms/inclusive_scan.hpp | 100 +++++++------- .../shp/algorithms/iota.hpp | 8 +- .../shp/algorithms/matrix/gemm.hpp | 26 ++-- .../shp/algorithms/matrix/gemv.hpp | 80 +++++------ .../shp/algorithms/matrix/local_gemm.hpp | 4 +- .../shp/algorithms/matrix/local_gemv.hpp | 10 +- .../shp/algorithms/reduce.hpp | 56 ++++---- .../shp/algorithms/sort.hpp | 58 ++++---- .../shp/algorithms/transform.hpp | 28 ++-- .../shp/allocators.hpp | 4 +- .../shp/containers/detail.hpp | 4 +- .../containers/distributed_dense_matrix.hpp | 54 ++++---- .../shp/containers/duplicated_vector.hpp | 12 +- .../shp/containers/matrix_entry.hpp | 54 ++++---- .../shp/containers/matrix_partition.hpp | 46 +++---- .../containers/sequential/dense_matrix.hpp | 8 +- .../shp/containers/sparse_matrix.hpp | 70 +++++----- .../distributed_ranges_impl/shp/detail.hpp | 6 +- .../shp/device_ptr.hpp | 4 +- .../shp/device_ref.hpp | 8 +- .../shp/device_span.hpp | 18 +-- .../shp/device_vector.hpp | 8 +- .../shp/distributed_span.hpp | 24 ++-- .../shp/distributed_vector.hpp | 34 ++--- .../distributed_ranges_impl/shp/future.hpp | 4 +- .../distributed_ranges_impl/shp/init.hpp | 4 +- .../distributed_ranges_impl/shp/range.hpp | 12 +- .../shp/range_adaptors.hpp | 6 +- .../distributed_ranges_impl/shp/span.hpp | 6 +- .../distributed_ranges_impl/shp/util.hpp | 16 +-- .../shp/util/coo_matrix.hpp | 20 +-- .../shp/util/generate_random.hpp | 6 +- .../shp/util/matrix_io.hpp | 26 ++-- .../distributed_ranges_impl/shp/vector.hpp | 4 +- .../shp/views/csr_matrix_view.hpp | 16 +-- .../shp/views/dense_column_view.hpp | 14 +- .../shp/views/dense_matrix_iterator.hpp | 12 +- .../shp/views/dense_matrix_view.hpp | 12 +- .../shp/views/dense_row_view.hpp | 14 +- .../shp/views/enumerate.hpp | 6 +- .../shp/views/standard_views.hpp | 16 +-- .../shp/views/views.hpp | 8 +- .../distributed_ranges_impl/shp/zip_view.hpp | 46 +++---- .../distributed_ranges_impl/views/iota.hpp | 4 +- .../views/transform.hpp | 20 +-- .../distributed_ranges_impl/views/views.hpp | 8 +- test/distributed-ranges/common/counted.cpp | 26 ++-- .../common/distributed_vector.cpp | 4 +- test/distributed-ranges/common/drop.cpp | 30 ++-- test/distributed-ranges/common/iota_view.cpp | 8 +- test/distributed-ranges/common/sycl_utils.cpp | 6 +- test/distributed-ranges/common/take.cpp | 24 ++-- test/distributed-ranges/common/zip.cpp | 8 +- .../include/common-tests.hpp | 14 +- test/distributed-ranges/shp/CMakeLists.txt | 15 +- test/distributed-ranges/shp/algorithms.cpp | 130 +++++++++--------- test/distributed-ranges/shp/containers-3.cpp | 4 +- test/distributed-ranges/shp/containers.cpp | 8 +- test/distributed-ranges/shp/containers.hpp | 2 +- test/distributed-ranges/shp/copy-3.cpp | 8 +- test/distributed-ranges/shp/copy.cpp | 32 ++--- test/distributed-ranges/shp/copy.hpp | 2 +- test/distributed-ranges/shp/detail.cpp | 4 +- test/distributed-ranges/shp/fill.cpp | 4 +- test/distributed-ranges/shp/gemv.cpp | 14 +- test/distributed-ranges/shp/shp-tests.cpp | 4 +- test/distributed-ranges/shp/transform.cpp | 32 ++--- test/distributed-ranges/shp/xhp-tests.hpp | 8 +- 86 files changed, 855 insertions(+), 857 deletions(-) diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp index 0894f710336..c2bd549ba1e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/concepts/concepts.hpp @@ -6,54 +6,55 @@ #include -namespace dr { +namespace experimental::dr +{ template concept remote_iterator = - std::forward_iterator && requires(I &iter) { dr::ranges::rank(iter); }; + std::forward_iterator && requires(I &iter) { experimental::dr::ranges::rank(iter); }; template concept remote_range = - rng::forward_range && requires(R &r) { dr::ranges::rank(r); }; + rng::forward_range && requires(R &r) { experimental::dr::ranges::rank(r); }; template concept distributed_range = - rng::forward_range && requires(R &r) { dr::ranges::segments(r); }; + rng::forward_range && requires(R &r) { experimental::dr::ranges::segments(r); }; template concept remote_contiguous_iterator = std::random_access_iterator && requires(I &iter) { - dr::ranges::rank(iter); - { dr::ranges::local(iter) } -> std::contiguous_iterator; + experimental::dr::ranges::rank(iter); + { experimental::dr::ranges::local(iter) } -> std::contiguous_iterator; }; template concept distributed_iterator = std::forward_iterator && requires(I &iter) { - dr::ranges::segments(iter); + experimental::dr::ranges::segments(iter); }; template concept remote_contiguous_range = remote_range && rng::random_access_range && requires(R &r) { - { dr::ranges::local(r) } -> rng::contiguous_range; + { experimental::dr::ranges::local(r) } -> rng::contiguous_range; }; template concept distributed_contiguous_range = distributed_range && rng::random_access_range && requires(R &r) { - { dr::ranges::segments(r) } -> rng::random_access_range; + { experimental::dr::ranges::segments(r) } -> rng::random_access_range; } && remote_contiguous_range< - rng::range_value_t()))>>; + rng::range_value_t()))>>; template concept distributed_contiguous_iterator = distributed_iterator && rng::random_access_iterator && requires(Iter &iter) { - { dr::ranges::segments(iter) } -> rng::random_access_range; + { experimental::dr::ranges::segments(iter) } -> rng::random_access_range; } && - remote_contiguous_range()))>>; -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp index a7ac841052c..a9fd556a1c5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp @@ -6,7 +6,7 @@ #include -namespace dr { +namespace experimental::dr { namespace __detail { @@ -59,4 +59,4 @@ inline constexpr auto enumerate = enumerate_fn_{}; } // namespace __detail -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp index a317e97bcf9..a9d072c35c2 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr { +namespace experimental::dr { namespace { template @@ -96,19 +96,19 @@ template class index { index_type second; }; -} // namespace dr +} // namespace experimental::dr namespace std { template -struct tuple_element> +struct tuple_element> : tuple_element> {}; template -struct tuple_size> : integral_constant {}; +struct tuple_size> : integral_constant {}; template -inline constexpr I get(dr::index index) +inline constexpr I get(experimental::dr::index index) requires(Index <= 1) { if constexpr (Index == 0) { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp index bc6a63b8db9..5cdc1eb36a5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp @@ -9,7 +9,7 @@ #include -namespace dr { +namespace experimental::dr { namespace { @@ -197,4 +197,4 @@ template class iterator_adaptor { accessor_type accessor_; }; -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp index 50642205c57..2b342936425 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp @@ -13,7 +13,7 @@ #include "format_shim.hpp" #include "ranges_shim.hpp" -namespace dr { +namespace experimental::dr { class timer { public: @@ -109,6 +109,6 @@ class logger { inline logger drlog; #define DRLOG(...) \ - dr::drlog.debug(nostd::source_location::current(), __VA_ARGS__) + experimental::dr::drlog.debug(nostd::source_location::current(), __VA_ARGS__) -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp index 86c572ef200..ec5892ecb7e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp @@ -8,7 +8,7 @@ #include -namespace dr { +namespace experimental::dr { namespace __detail { @@ -108,4 +108,4 @@ template class direct_iterator { } // namespace __detail -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp index d6027c6823b..43074a1f966 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp @@ -6,7 +6,7 @@ #include -namespace dr { +namespace experimental::dr { namespace __detail { @@ -66,4 +66,4 @@ class owning_view : public rng::view_interface> { } // namespace __detail -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp index 11fa8d5c524..4e6060c454c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/ranges.hpp @@ -10,7 +10,7 @@ #include -namespace dr { +namespace experimental::dr { namespace ranges { @@ -80,7 +80,7 @@ namespace { template concept remote_range_shadow_impl_ = - rng::forward_range && requires(R &r) { dr::ranges::rank(r); }; + rng::forward_range && requires(R &r) { experimental::dr::ranges::rank(r); }; template concept segments_range = @@ -240,14 +240,14 @@ namespace __detail { template concept has_local = requires(T &t) { - { dr::ranges::local(t) } -> std::convertible_to; + { experimental::dr::ranges::local(t) } -> std::convertible_to; }; struct local_fn_ { template requires(has_local) auto operator()(T &&t) const { - return dr::ranges::local(t); + return experimental::dr::ranges::local(t); } template decltype(auto) operator()(T &&t) const { return t; } @@ -259,4 +259,4 @@ inline constexpr auto local = local_fn_{}; } // namespace ranges -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp index 50147347693..7b268ef720b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr { +namespace experimental::dr { template class remote_subrange : public rng::subrange { @@ -27,9 +27,9 @@ class remote_subrange : public rng::subrange { constexpr remote_subrange(R &&r, std::size_t rank) : base(rng::begin(r), rng::end(r)), rank_(rank) {} - template + template constexpr remote_subrange(R &&r) - : base(rng::begin(r), rng::end(r)), rank_(dr::ranges::rank(r)) {} + : base(rng::begin(r), rng::end(r)), rank_(experimental::dr::ranges::rank(r)) {} constexpr std::size_t rank() const noexcept { return rank_; } @@ -40,15 +40,15 @@ class remote_subrange : public rng::subrange { template remote_subrange(R &&, std::size_t) -> remote_subrange>; -template +template remote_subrange(R &&) -> remote_subrange>; -} // namespace dr +} // namespace experimental::dr #if !defined(DR_SPEC) // Needed to satisfy concepts for rng::begin template -inline constexpr bool rng::enable_borrowed_range> = true; +inline constexpr bool rng::enable_borrowed_range> = true; #endif diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp index 59fce0dd761..73c3bfdb2f4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp @@ -10,7 +10,7 @@ #include #include -namespace dr { +namespace experimental::dr { namespace __detail { @@ -26,9 +26,9 @@ auto take_segments(R &&segments, std::size_t last_seg, std::size_t local_id) { auto first = rng::begin(segment); auto last = rng::begin(segment); rng::advance(last, remainder); - return dr::remote_subrange(first, last, dr::ranges::rank(segment)); + return experimental::dr::remote_subrange(first, last, experimental::dr::ranges::rank(segment)); } else { - return dr::remote_subrange(segment); + return experimental::dr::remote_subrange(segment); } }; @@ -63,9 +63,9 @@ auto drop_segments(R &&segments, std::size_t first_seg, std::size_t local_id) { auto first = rng::begin(segment); rng::advance(first, remainder); auto last = rng::end(segment); - return dr::remote_subrange(first, last, dr::ranges::rank(segment)); + return experimental::dr::remote_subrange(first, last, experimental::dr::ranges::rank(segment)); } else { - return dr::remote_subrange(segment); + return experimental::dr::remote_subrange(segment); } }; @@ -91,49 +91,49 @@ template auto drop_segments(R &&segments, std::size_t n) { } // namespace __detail -} // namespace dr +} // namespace experimental::dr namespace DR_RANGES_NAMESPACE { // A standard library range adaptor does not change the rank of a // remote range, so we can simply return the rank of the base view. template - requires(dr::remote_range().base())>) + requires(experimental::dr::remote_range().base())>) auto rank_(V &&v) { - return dr::ranges::rank(std::forward(v).base()); + return experimental::dr::ranges::rank(std::forward(v).base()); } template - requires(dr::is_ref_view_v> && - dr::distributed_range().base())>) + requires(experimental::dr::is_ref_view_v> && + experimental::dr::distributed_range().base())>) auto segments_(V &&v) { - return dr::ranges::segments(v.base()); + return experimental::dr::ranges::segments(v.base()); } template - requires(dr::is_take_view_v> && - dr::distributed_range().base())>) + requires(experimental::dr::is_take_view_v> && + experimental::dr::distributed_range().base())>) auto segments_(V &&v) { - return dr::__detail::take_segments(dr::ranges::segments(v.base()), v.size()); + return experimental::dr::__detail::take_segments(experimental::dr::ranges::segments(v.base()), v.size()); } template - requires(dr::is_drop_view_v> && - dr::distributed_range().base())>) + requires(experimental::dr::is_drop_view_v> && + experimental::dr::distributed_range().base())>) auto segments_(V &&v) { - return dr::__detail::drop_segments(dr::ranges::segments(v.base()), + return experimental::dr::__detail::drop_segments(experimental::dr::ranges::segments(v.base()), v.base().size() - v.size()); } template - requires(dr::is_subrange_view_v> && - dr::distributed_iterator>) + requires(experimental::dr::is_subrange_view_v> && + experimental::dr::distributed_iterator>) auto segments_(V &&v) { auto first = rng::begin(v); auto last = rng::end(v); auto size = rng::distance(first, last); - return dr::__detail::take_segments(dr::ranges::segments(first), size); + return experimental::dr::__detail::take_segments(experimental::dr::ranges::segments(first), size); } } // namespace DR_RANGES_NAMESPACE diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp index 143f3e8612e..177c785b09e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp @@ -12,7 +12,7 @@ #include -namespace dr::__detail { +namespace experimental::dr::__detail { // With the ND-range workaround, the maximum kernel size is // `std::numeric_limits::max()` rounded down to @@ -191,17 +191,17 @@ sycl::event parallel_for(sycl::queue &q, sycl::range<3> global, Fn &&fn) { using event = sycl::event; -} // namespace dr::__detail +} // namespace experimental::dr::__detail #else -namespace dr::__detail { +namespace experimental::dr::__detail { class event { public: void wait() {} }; -} // namespace dr::__detail +} // namespace experimental::dr::__detail #endif // SYCL_LANGUAGE_VERSION diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp index b972153c11d..357bdfd8665 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp @@ -4,7 +4,7 @@ #pragma once -namespace dr::__detail { +namespace experimental::dr::__detail { inline std::size_t round_up(std::size_t n, std::size_t multiple) { if (multiple == 0) { @@ -27,4 +27,4 @@ inline std::size_t partition_up(std::size_t n, std::size_t multiple) { return round_up(n, multiple) / multiple; } -} // namespace dr::__detail +} // namespace experimental::dr::__detail diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp index edd50c285cc..4b39ab5adda 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp @@ -6,7 +6,7 @@ #include -namespace dr { +namespace experimental::dr { template struct is_ref_view : std::false_type {}; template @@ -56,4 +56,4 @@ struct is_zip_view> : std::true_type {}; template inline constexpr bool is_zip_view_v = is_zip_view::value; -} // namespace dr +} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp index ebdc6425da3..c909c60c54c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/copy.hpp @@ -15,14 +15,14 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { // Copy between contiguous ranges template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { - // auto &&q = dr::shp::__detail::default_queue(); + // auto &&q = experimental::dr::shp::__detail::default_queue(); auto &&q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(std::to_address(d_first), std::to_address(first), sizeof(std::iter_value_t) * (last - first)); @@ -41,7 +41,7 @@ OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { template requires __detail::is_syclmemcopyable, T> sycl::event copy_async(Iter first, Iter last, device_ptr d_first) { - // auto &&q = dr::shp::__detail::default_queue(); + // auto &&q = experimental::dr::shp::__detail::default_queue(); auto &&q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(d_first.get_raw_pointer(), std::to_address(first), sizeof(T) * (last - first)); @@ -58,7 +58,7 @@ device_ptr copy(Iter first, Iter last, device_ptr d_first) { template requires __detail::is_syclmemcopyable> sycl::event copy_async(device_ptr first, device_ptr last, Iter d_first) { - // auto &&q = dr::shp::__detail::default_queue(); + // auto &&q = experimental::dr::shp::__detail::default_queue(); auto &&q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(std::to_address(d_first), first.get_raw_pointer(), sizeof(T) * (last - first)); @@ -77,7 +77,7 @@ template sycl::event copy_async(device_ptr> first, device_ptr> last, device_ptr d_first) { - // auto &&q = dr::shp::__detail::default_queue(); + // auto &&q = experimental::dr::shp::__detail::default_queue(); auto &&q = __detail::get_queue_for_pointers(first, d_first); return q.memcpy(d_first.get_raw_pointer(), first.get_raw_pointer(), sizeof(T) * (last - first)); @@ -102,11 +102,11 @@ device_ptr copy(device_ptr> first, } // Copy from local range to distributed range -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { - auto &&segments = dr::ranges::segments(d_first); + auto &&segments = experimental::dr::ranges::segments(d_first); auto segment_iter = rng::begin(segments); std::vector events; @@ -121,24 +121,24 @@ sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { rng::advance(local_last, n_to_copy); events.emplace_back( - dr::shp::copy_async(first, local_last, rng::begin(segment))); + experimental::dr::shp::copy_async(first, local_last, rng::begin(segment))); ++segment_iter; rng::advance(first, n_to_copy); } - return dr::shp::__detail::combine_events(events); + return experimental::dr::shp::__detail::combine_events(events); } -auto copy(rng::contiguous_range auto r, dr::distributed_iterator auto d_first) { +auto copy(rng::contiguous_range auto r, experimental::dr::distributed_iterator auto d_first) { return copy(rng::begin(r), rng::end(r), d_first); } -auto copy(dr::distributed_range auto r, std::contiguous_iterator auto d_first) { +auto copy(experimental::dr::distributed_range auto r, std::contiguous_iterator auto d_first) { return copy(rng::begin(r), rng::end(r), d_first); } -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { @@ -147,13 +147,13 @@ OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { } // Copy from distributed range to local range -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { auto dist = rng::distance(first, last); auto segments = - dr::__detail::take_segments(dr::ranges::segments(first), dist); + experimental::dr::__detail::take_segments(experimental::dr::ranges::segments(first), dist); std::vector events; @@ -161,15 +161,15 @@ sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { auto size = rng::distance(segment); events.emplace_back( - dr::shp::copy_async(rng::begin(segment), rng::end(segment), d_first)); + experimental::dr::shp::copy_async(rng::begin(segment), rng::end(segment), d_first)); rng::advance(d_first, size); } - return dr::shp::__detail::combine_events(events); + return experimental::dr::shp::__detail::combine_events(events); } -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { @@ -178,13 +178,13 @@ OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { } // Copy from distributed range to distributed range -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { auto dist = rng::distance(first, last); auto segments = - dr::__detail::take_segments(dr::ranges::segments(first), dist); + experimental::dr::__detail::take_segments(experimental::dr::ranges::segments(first), dist); std::vector events; @@ -192,15 +192,15 @@ sycl::event copy_async(InputIt first, InputIt last, OutputIt d_first) { auto size = rng::distance(segment); events.emplace_back( - dr::shp::copy_async(rng::begin(segment), rng::end(segment), d_first)); + experimental::dr::shp::copy_async(rng::begin(segment), rng::end(segment), d_first)); rng::advance(d_first, size); } - return dr::shp::__detail::combine_events(events); + return experimental::dr::shp::__detail::combine_events(events); } -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { @@ -211,18 +211,18 @@ OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { // Ranges versions // Distributed to distributed -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> sycl::event copy_async(R &&r, O result) { return copy_async(rng::begin(r), rng::end(r), result); } -template +template requires __detail::is_syclmemcopyable, std::iter_value_t> O copy(R &&r, O result) { return copy(rng::begin(r), rng::end(r), result); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp index 0ae647b29c8..34e59dae3ec 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/exclusive_scan.hpp @@ -20,10 +20,10 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { -template +template void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, BinaryOp &&binary_op) { using T = rng::range_value_t; @@ -31,7 +31,7 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, static_assert( std::is_same_v, device_policy>); - auto zipped_view = dr::shp::views::zip(r, o); + auto zipped_view = experimental::dr::shp::views::zip(r, o); auto zipped_segments = zipped_view.zipped_segments(); if constexpr (std::is_same_v, @@ -49,7 +49,7 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, auto last_element = rng::prev(rng::end(__detail::local(in_segment))); auto dest = d_inits + segment_id; - auto &&q = __detail::queue(dr::ranges::rank(in_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(in_segment)); auto e = q.single_task([=] { *dest = *last_element; }); events.push_back(e); @@ -67,17 +67,17 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, inits[0] = init; - auto root = dr::shp::devices()[0]; - dr::shp::device_allocator allocator(dr::shp::context(), root); - dr::shp::vector> partial_sums( + auto root = experimental::dr::shp::devices()[0]; + experimental::dr::shp::device_allocator allocator(experimental::dr::shp::context(), root); + experimental::dr::shp::vector> partial_sums( std::size_t(zipped_segments.size()), allocator); segment_id = 0; for (auto &&segs : zipped_segments) { auto &&[in_segment, out_segment] = segs; - auto &&q = __detail::queue(dr::ranges::rank(in_segment)); - auto &&local_policy = __detail::dpl_policy(dr::ranges::rank(in_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(in_segment)); + auto &&local_policy = __detail::dpl_policy(experimental::dr::ranges::rank(in_segment)); auto dist = rng::distance(in_segment); assert(dist > 0); @@ -89,13 +89,13 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, auto init = inits[segment_id]; auto event = oneapi::dpl::experimental::exclusive_scan_async( - local_policy, dr::__detail::direct_iterator(first), - dr::__detail::direct_iterator(last), - dr::__detail::direct_iterator(d_first), init, binary_op); + local_policy, experimental::dr::__detail::direct_iterator(first), + experimental::dr::__detail::direct_iterator(last), + experimental::dr::__detail::direct_iterator(d_first), init, binary_op); - auto dst_iter = dr::ranges::local(partial_sums).data() + segment_id; + auto dst_iter = experimental::dr::ranges::local(partial_sums).data() + segment_id; - auto src_iter = dr::ranges::local(out_segment).data(); + auto src_iter = experimental::dr::ranges::local(out_segment).data(); rng::advance(src_iter, dist - 1); auto e = q.submit([&](auto &&h) { @@ -116,7 +116,7 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, auto &&local_policy = __detail::dpl_policy(0); - auto first = dr::ranges::local(partial_sums).data(); + auto first = experimental::dr::ranges::local(partial_sums).data(); auto last = first + partial_sums.size(); oneapi::dpl::experimental::inclusive_scan_async(local_policy, first, last, @@ -128,15 +128,15 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, auto &&[in_segment, out_segment] = segs; if (idx > 0) { - auto &&q = __detail::queue(dr::ranges::rank(out_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(out_segment)); auto first = rng::begin(out_segment); - dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_first(first); auto d_sum = - dr::ranges::__detail::local(partial_sums).begin() + idx - 1; + experimental::dr::ranges::__detail::local(partial_sums).begin() + idx - 1; - sycl::event e = dr::__detail::parallel_for( + sycl::event e = experimental::dr::__detail::parallel_for( q, sycl::range<>(rng::distance(out_segment)), [=](auto idx) { d_first[idx] = binary_op(d_first[idx], *d_sum); }); @@ -154,8 +154,8 @@ void exclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, U init, // Ranges versions -template +template void exclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, T init, BinaryOp &&binary_op) { exclusive_scan_impl_(std::forward(policy), @@ -163,33 +163,33 @@ void exclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, T init, std::forward(binary_op)); } -template +template void exclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, T init) { exclusive_scan_impl_(std::forward(policy), std::forward(r), std::forward(o), init, std::plus<>{}); } -template +template void exclusive_scan(R &&r, O &&o, T init, BinaryOp &&binary_op) { - exclusive_scan_impl_(dr::shp::par_unseq, std::forward(r), + exclusive_scan_impl_(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o), init, std::forward(binary_op)); } -template +template void exclusive_scan(R &&r, O &&o, T init) { - exclusive_scan_impl_(dr::shp::par_unseq, std::forward(r), + exclusive_scan_impl_(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o), init, std::plus<>{}); } // Iterator versions -template +template void exclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, T init, BinaryOp &&binary_op) { auto dist = rng::distance(first, last); @@ -200,26 +200,26 @@ void exclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, rng::subrange(d_first, d_last), init, std::forward(binary_op)); } -template +template void exclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, T init) { exclusive_scan(std::forward(policy), first, last, d_first, init, std::plus<>{}); } -template void exclusive_scan(Iter first, Iter last, OutputIter d_first, T init, BinaryOp &&binary_op) { - exclusive_scan(dr::shp::par_unseq, first, last, d_first, init, + exclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first, init, std::forward(binary_op)); } -template void exclusive_scan(Iter first, Iter last, OutputIter d_first, T init) { - exclusive_scan(dr::shp::par_unseq, first, last, d_first, init); + exclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first, init); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp index 8bdfa00f651..33de94953f7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { struct device_policy { device_policy(sycl::device device) : devices_({device}) {} @@ -31,4 +31,4 @@ struct device_policy { std::vector devices_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp index 26226c63f21..fe287091d38 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template requires(!std::is_const_v> && @@ -25,7 +25,7 @@ sycl::event fill_async(Iter first, Iter last, auto &&q = __detail::get_queue_for_pointer(first); std::iter_value_t *arr = std::to_address(first); // not using q.fill because of CMPLRLLVM-46438 - return dr::__detail::parallel_for(q, sycl::range<>(last - first), + return experimental::dr::__detail::parallel_for(q, sycl::range<>(last - first), [=](auto idx) { arr[idx] = value; }); } @@ -42,7 +42,7 @@ sycl::event fill_async(device_ptr first, device_ptr last, auto &&q = __detail::get_queue_for_pointer(first); auto *arr = first.get_raw_pointer(); // not using q.fill because of CMPLRLLVM-46438 - return dr::__detail::parallel_for(q, sycl::range<>(last - first), + return experimental::dr::__detail::parallel_for(q, sycl::range<>(last - first), [=](auto idx) { arr[idx] = value; }); } @@ -52,43 +52,43 @@ void fill(device_ptr first, device_ptr last, const U &value) { fill_async(first, last, value).wait(); } -template +template sycl::event fill_async(R &&r, const T &value) { - auto &&q = __detail::queue(dr::ranges::rank(r)); - auto *arr = std::to_address(rng::begin(dr::ranges::local(r))); + auto &&q = __detail::queue(experimental::dr::ranges::rank(r)); + auto *arr = std::to_address(rng::begin(experimental::dr::ranges::local(r))); // not using q.fill because of CMPLRLLVM-46438 - return dr::__detail::parallel_for(q, sycl::range<>(rng::distance(r)), + return experimental::dr::__detail::parallel_for(q, sycl::range<>(rng::distance(r)), [=](auto idx) { arr[idx] = value; }); } -template +template auto fill(R &&r, const T &value) { fill_async(r, value).wait(); return rng::end(r); } -template +template sycl::event fill_async(DR &&r, const T &value) { std::vector events; - for (auto &&segment : dr::ranges::segments(r)) { - auto e = dr::shp::fill_async(segment, value); + for (auto &&segment : experimental::dr::ranges::segments(r)) { + auto e = experimental::dr::shp::fill_async(segment, value); events.push_back(e); } - return dr::shp::__detail::combine_events(events); + return experimental::dr::shp::__detail::combine_events(events); } -template +template auto fill(DR &&r, const T &value) { fill_async(r, value).wait(); return rng::end(r); } -template +template auto fill(Iter first, Iter last, const T &value) { fill_async(rng::subrange(first, last), value).wait(); return last; } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp index 0266bdea344..1135740f933 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp @@ -13,17 +13,17 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { -template +template void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { static_assert( // currently only one policy supported std::is_same_v, device_policy>); std::vector events; - for (auto &&segment : dr::ranges::segments(r)) { - auto &&q = __detail::queue(dr::ranges::rank(segment)); + for (auto &&segment : experimental::dr::ranges::segments(r)) { + auto &&q = __detail::queue(experimental::dr::ranges::rank(segment)); assert(rng::distance(segment) > 0); @@ -31,7 +31,7 @@ void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { auto first = rng::begin(local_segment); - auto event = dr::__detail::parallel_for( + auto event = experimental::dr::__detail::parallel_for( q, sycl::range<>(rng::distance(local_segment)), [=](auto idx) { fn(*(first + idx)); }); events.emplace_back(event); @@ -39,19 +39,19 @@ void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { __detail::wait(events); } -template +template void for_each(ExecutionPolicy &&policy, Iter begin, Iter end, Fn &&fn) { for_each(std::forward(policy), rng::subrange(begin, end), std::forward(fn)); } -template void for_each(R &&r, Fn &&fn) { - for_each(dr::shp::par_unseq, std::forward(r), std::forward(fn)); +template void for_each(R &&r, Fn &&fn) { + for_each(experimental::dr::shp::par_unseq, std::forward(r), std::forward(fn)); } -template +template void for_each(Iter begin, Iter end, Fn &&fn) { - for_each(dr::shp::par_unseq, begin, end, std::forward(fn)); + for_each(experimental::dr::shp::par_unseq, begin, end, std::forward(fn)); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp index 15fbd9467dc..17c1e4c7c02 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/inclusive_scan.hpp @@ -22,10 +22,10 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { -template > void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, BinaryOp &&binary_op, std::optional init = {}) { @@ -34,7 +34,7 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, static_assert( std::is_same_v, device_policy>); - auto zipped_view = dr::shp::views::zip(r, o); + auto zipped_view = experimental::dr::shp::views::zip(r, o); auto zipped_segments = zipped_view.zipped_segments(); if constexpr (std::is_same_v, @@ -42,17 +42,17 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, std::vector events; - auto root = dr::shp::devices()[0]; - dr::shp::device_allocator allocator(dr::shp::context(), root); - dr::shp::vector> partial_sums( + auto root = experimental::dr::shp::devices()[0]; + experimental::dr::shp::device_allocator allocator(experimental::dr::shp::context(), root); + experimental::dr::shp::vector> partial_sums( std::size_t(zipped_segments.size()), allocator); std::size_t segment_id = 0; for (auto &&segs : zipped_segments) { auto &&[in_segment, out_segment] = segs; - auto &&q = __detail::queue(dr::ranges::rank(in_segment)); - auto &&local_policy = __detail::dpl_policy(dr::ranges::rank(in_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(in_segment)); + auto &&local_policy = __detail::dpl_policy(experimental::dr::ranges::rank(in_segment)); auto dist = rng::distance(in_segment); assert(dist > 0); @@ -65,19 +65,19 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, if (segment_id == 0 && init.has_value()) { event = oneapi::dpl::experimental::inclusive_scan_async( - local_policy, dr::__detail::direct_iterator(first), - dr::__detail::direct_iterator(last), - dr::__detail::direct_iterator(d_first), binary_op, init.value()); + local_policy, experimental::dr::__detail::direct_iterator(first), + experimental::dr::__detail::direct_iterator(last), + experimental::dr::__detail::direct_iterator(d_first), binary_op, init.value()); } else { event = oneapi::dpl::experimental::inclusive_scan_async( - local_policy, dr::__detail::direct_iterator(first), - dr::__detail::direct_iterator(last), - dr::__detail::direct_iterator(d_first), binary_op); + local_policy, experimental::dr::__detail::direct_iterator(first), + experimental::dr::__detail::direct_iterator(last), + experimental::dr::__detail::direct_iterator(d_first), binary_op); } - auto dst_iter = dr::ranges::local(partial_sums).data() + segment_id; + auto dst_iter = experimental::dr::ranges::local(partial_sums).data() + segment_id; - auto src_iter = dr::ranges::local(out_segment).data(); + auto src_iter = experimental::dr::ranges::local(out_segment).data(); rng::advance(src_iter, dist - 1); auto e = q.submit([&](auto &&h) { @@ -98,7 +98,7 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, auto &&local_policy = __detail::dpl_policy(0); - auto first = dr::ranges::local(partial_sums).data(); + auto first = experimental::dr::ranges::local(partial_sums).data(); auto last = first + partial_sums.size(); oneapi::dpl::experimental::inclusive_scan_async(local_policy, first, last, @@ -110,15 +110,15 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, auto &&[in_segment, out_segment] = segs; if (idx > 0) { - auto &&q = __detail::queue(dr::ranges::rank(out_segment)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(out_segment)); auto first = rng::begin(out_segment); - dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_first(first); auto d_sum = - dr::ranges::__detail::local(partial_sums).begin() + idx - 1; + experimental::dr::ranges::__detail::local(partial_sums).begin() + idx - 1; - sycl::event e = dr::__detail::parallel_for( + sycl::event e = experimental::dr::__detail::parallel_for( q, sycl::range<>(rng::distance(out_segment)), [=](auto idx) { d_first[idx] = binary_op(d_first[idx], *d_sum); }); @@ -134,8 +134,8 @@ void inclusive_scan_impl_(ExecutionPolicy &&policy, R &&r, O &&o, } } -template +template void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, BinaryOp &&binary_op, T init) { inclusive_scan_impl_(std::forward(policy), @@ -143,8 +143,8 @@ void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, std::forward(binary_op), std::optional(init)); } -template +template void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, BinaryOp &&binary_op) { inclusive_scan_impl_(std::forward(policy), @@ -152,8 +152,8 @@ void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o, std::forward(binary_op)); } -template +template void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o) { inclusive_scan(std::forward(policy), std::forward(r), std::forward(o), std::plus>()); @@ -161,8 +161,8 @@ void inclusive_scan(ExecutionPolicy &&policy, R &&r, O &&o) { // Distributed iterator versions -template +template OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op, T init) { @@ -176,8 +176,8 @@ OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, return d_last; } -template +template OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op) { @@ -191,8 +191,8 @@ OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, return d_last; } -template +template OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, OutputIter d_first) { auto dist = rng::distance(first, last); @@ -206,47 +206,47 @@ OutputIter inclusive_scan(ExecutionPolicy &&policy, Iter first, Iter last, // Execution policy-less versions -template +template void inclusive_scan(R &&r, O &&o) { - inclusive_scan(dr::shp::par_unseq, std::forward(r), std::forward(o)); + inclusive_scan(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o)); } -template +template void inclusive_scan(R &&r, O &&o, BinaryOp &&binary_op) { - inclusive_scan(dr::shp::par_unseq, std::forward(r), std::forward(o), + inclusive_scan(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o), std::forward(binary_op)); } -template +template void inclusive_scan(R &&r, O &&o, BinaryOp &&binary_op, T init) { - inclusive_scan(dr::shp::par_unseq, std::forward(r), std::forward(o), + inclusive_scan(experimental::dr::shp::par_unseq, std::forward(r), std::forward(o), std::forward(binary_op), init); } // Distributed iterator versions -template +template OutputIter inclusive_scan(Iter first, Iter last, OutputIter d_first) { - return inclusive_scan(dr::shp::par_unseq, first, last, d_first); + return inclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first); } -template OutputIter inclusive_scan(Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op) { - return inclusive_scan(dr::shp::par_unseq, first, last, d_first, + return inclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first, std::forward(binary_op)); } -template OutputIter inclusive_scan(Iter first, Iter last, OutputIter d_first, BinaryOp &&binary_op, T init) { - return inclusive_scan(dr::shp::par_unseq, first, last, d_first, + return inclusive_scan(experimental::dr::shp::par_unseq, first, last, d_first, std::forward(binary_op), init); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp index bfc250abfe6..01ebc163ce6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp @@ -11,9 +11,9 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { -template void iota(R &&r, T value) { +template void iota(R &&r, T value) { auto iota_view = rng::views::iota(value, T(value + rng::distance(r))); for_each(par_unseq, views::zip(iota_view, r), [](auto &&elem) { @@ -22,10 +22,10 @@ template void iota(R &&r, T value) { }); } -template +template void iota(Iter begin, Iter end, T value) { auto r = rng::subrange(begin, end); iota(r, value); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp index e860a5ed5a9..21f5a803e72 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template void gemm(distributed_dense_matrix &a, distributed_dense_matrix &b, @@ -48,7 +48,7 @@ void gemm_inplace(distributed_dense_matrix &a, auto &&a_tile = a.tile({i, k}); auto &&b_tile = b.tile({k, j}); - auto &&q = __detail::queue(dr::ranges::rank(c_tile)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(c_tile)); auto e = __detail::local_gemm(q, __detail::local(a_tile), __detail::local(b_tile), @@ -91,14 +91,14 @@ void gemm_buffered(distributed_dense_matrix &a, auto c_local = c.tile({i, j}); threads.emplace_back([c_local, i, j, &a, &b, &communication, &compute] { - auto &&q = __detail::queue(dr::ranges::rank(c_local)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(c_local)); std::size_t a_elem = a.tile_shape()[0] * a.tile_shape()[1]; std::size_t b_elem = b.tile_shape()[0] * b.tile_shape()[1]; std::size_t buffer_size = std::max(a_elem, b_elem); - dr::shp::device_allocator gpu_allocator(q); - dr::shp::buffered_allocator buffered_allocator(gpu_allocator, + experimental::dr::shp::device_allocator gpu_allocator(q); + experimental::dr::shp::buffered_allocator buffered_allocator(gpu_allocator, buffer_size, 2); auto &&allocator = buffered_allocator; @@ -114,8 +114,8 @@ void gemm_buffered(distributed_dense_matrix &a, double duration = std::chrono::duration(end - begin).count(); communication += duration; - dr::shp::dense_matrix_view a_local(a_tile); - dr::shp::dense_matrix_view b_local(b_tile); + experimental::dr::shp::dense_matrix_view a_local(a_tile); + experimental::dr::shp::dense_matrix_view b_local(b_tile); begin = std::chrono::high_resolution_clock::now(); __detail::local_gemm(q, __detail::local(a_local), @@ -168,14 +168,14 @@ void gemm_buffered_async(distributed_dense_matrix &a, auto c_local = c.tile({i, j}); threads.emplace_back([c_local, i, j, &a, &b, &issue, &sync, &compute] { - auto &&q = __detail::queue(dr::ranges::rank(c_local)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(c_local)); std::size_t a_elem = a.tile_shape()[0] * a.tile_shape()[1]; std::size_t b_elem = b.tile_shape()[0] * b.tile_shape()[1]; std::size_t buffer_size = std::max(a_elem, b_elem); - dr::shp::device_allocator gpu_allocator(q); - dr::shp::buffered_allocator buffered_allocator(gpu_allocator, + experimental::dr::shp::device_allocator gpu_allocator(q); + experimental::dr::shp::buffered_allocator buffered_allocator(gpu_allocator, buffer_size, 4); auto &&allocator = buffered_allocator; @@ -202,8 +202,8 @@ void gemm_buffered_async(distributed_dense_matrix &a, double duration = std::chrono::duration(end - begin).count(); sync += duration; - dr::shp::dense_matrix_view a_local(a_tile); - dr::shp::dense_matrix_view b_local(b_tile); + experimental::dr::shp::dense_matrix_view a_local(a_tile); + experimental::dr::shp::dense_matrix_view b_local(b_tile); if (k_ + 1 < a.grid_shape()[1]) { begin = std::chrono::high_resolution_clock::now(); @@ -242,4 +242,4 @@ void gemm_buffered_async(distributed_dense_matrix &a, } } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp index a4c0842f744..f90aa1a0e41 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp @@ -14,45 +14,45 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { -template -void flat_gemv(C &&c, dr::shp::sparse_matrix &a, B &&b) { +template +void flat_gemv(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b) { assert(c.size() == b.size()); assert(a.shape()[1] == b.size()); assert(a.grid_shape()[0] == c.segments().size()); assert(a.grid_shape()[1] == 1); - auto &&devices = dr::shp::devices(); + auto &&devices = experimental::dr::shp::devices(); using b_scalar_type = rng::range_value_t; using local_vector_type = - dr::shp::device_vector>; + experimental::dr::shp::device_vector>; std::vector local_b; std::vector copy_events; std::vector comp_events; for (std::size_t i = 0; i < devices.size(); i++) { - dr::shp::device_allocator allocator(dr::shp::context(), devices[i]); + experimental::dr::shp::device_allocator allocator(experimental::dr::shp::context(), devices[i]); local_b.push_back(local_vector_type(b.size(), allocator, i)); } for (auto &&l_b : local_b) { auto event = - dr::shp::copy_async(b.begin(), b.end(), dr::ranges::local(l_b.begin())); + experimental::dr::shp::copy_async(b.begin(), b.end(), experimental::dr::ranges::local(l_b.begin())); copy_events.push_back(event); } for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - auto a_tile = a.tile(dr::index(i, 0)); + auto a_tile = a.tile(experimental::dr::index(i, 0)); auto a_iter = a_tile.begin(); - auto b_iter = dr::ranges::local(local_b[i].begin()); - auto c_iter = dr::ranges::local(c.segments()[i].begin()); + auto b_iter = experimental::dr::ranges::local(local_b[i].begin()); + auto c_iter = experimental::dr::ranges::local(c.segments()[i].begin()); auto &&q = __detail::queue(a_tile.rank()); @@ -75,9 +75,9 @@ void flat_gemv(C &&c, dr::shp::sparse_matrix &a, B &&b) { __detail::wait(comp_events); } -template -void gemv(C &&c, dr::shp::sparse_matrix &a, B &&b, +template +void gemv(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b, shp::duplicated_vector> &scratch) { assert(c.size() == b.size()); assert(a.shape()[1] == b.size()); @@ -93,16 +93,16 @@ void gemv(C &&c, dr::shp::sparse_matrix &a, B &&b, for (std::size_t i = 0; i < shp::nprocs(); i++) { auto &&l_b = b_duplicated.local_vector(i); - auto event = dr::shp::copy_async(b.begin(), b.end(), l_b.begin()); + auto event = experimental::dr::shp::copy_async(b.begin(), b.end(), l_b.begin()); copy_events.push_back(event); } for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - auto a_tile = a.tile(dr::index(i, 0)); + auto a_tile = a.tile(experimental::dr::index(i, 0)); auto b_iter = - dr::ranges::local(b_duplicated.local_vector(a_tile.rank()).begin()); - auto c_iter = dr::ranges::local(c.segments()[i].begin()); + experimental::dr::ranges::local(b_duplicated.local_vector(a_tile.rank()).begin()); + auto c_iter = experimental::dr::ranges::local(c.segments()[i].begin()); auto &&q = __detail::queue(a_tile.rank()); @@ -114,17 +114,17 @@ void gemv(C &&c, dr::shp::sparse_matrix &a, B &&b, __detail::wait(comp_events); } -template -void gemv(C &&c, dr::shp::sparse_matrix &a, B &&b) { - dr::shp::duplicated_vector> b_duplicated(b.size()); +template +void gemv(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b) { + experimental::dr::shp::duplicated_vector> b_duplicated(b.size()); gemv(c, a, b, b_duplicated); } -template -void gemv_square(C &&c, dr::shp::sparse_matrix &a, B &&b) { +template +void gemv_square(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b) { assert(a.shape()[0] == c.size()); assert(a.shape()[1] == b.size()); assert(a.grid_shape()[0] == c.segments().size()); @@ -136,12 +136,12 @@ void gemv_square(C &&c, dr::shp::sparse_matrix &a, B &&b) { std::size_t k_offset = i; for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - auto a_tile = a.tile(dr::index(i, k)); + auto a_tile = a.tile(experimental::dr::index(i, k)); auto b_segment = b.segments()[k]; auto c_segment = c.segments()[i]; - auto b_iter = dr::ranges::local(b_segment.begin()); - auto c_iter = dr::ranges::local(c_segment.begin()); + auto b_iter = experimental::dr::ranges::local(b_segment.begin()); + auto c_iter = experimental::dr::ranges::local(c_segment.begin()); auto &&q = __detail::queue(a_tile.rank()); @@ -153,21 +153,21 @@ void gemv_square(C &&c, dr::shp::sparse_matrix &a, B &&b) { __detail::wait(events); } -template -void gemv_square_copy(C &&c, dr::shp::sparse_matrix &a, B &&b) { +template +void gemv_square_copy(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b) { assert(a.shape()[0] == c.size()); assert(a.shape()[1] == b.size()); assert(a.grid_shape()[0] == c.segments().size()); assert(a.grid_shape()[1] == b.segments().size()); - auto &&devices = dr::shp::devices(); + auto &&devices = experimental::dr::shp::devices(); using b_scalar_type = rng::range_value_t; using local_vector_type = - dr::shp::device_vector>; + experimental::dr::shp::device_vector>; std::vector local_b; std::vector events; @@ -175,10 +175,10 @@ void gemv_square_copy(C &&c, dr::shp::sparse_matrix &a, B &&b) { local_b.reserve(a.grid_shape()[0]); for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - dr::shp::device_allocator allocator( - dr::shp::context(), devices[a.tile(dr::index(i, 0)).rank()]); + experimental::dr::shp::device_allocator allocator( + experimental::dr::shp::context(), devices[a.tile(experimental::dr::index(i, 0)).rank()]); local_b.emplace_back(b.size(), allocator, - a.tile(dr::index(i, 0)).rank()); + a.tile(experimental::dr::index(i, 0)).rank()); } for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { @@ -193,7 +193,7 @@ void gemv_square_copy(C &&c, dr::shp::sparse_matrix &a, B &&b) { auto &&q = __detail::queue(a_tile.rank()); auto ce = - dr::shp::copy_async(q, b_segment.begin(), b_segment.end(), b_iter); + experimental::dr::shp::copy_async(q, b_segment.begin(), b_segment.end(), b_iter); auto event = __detail::custom_gemv(q, a_tile, b_iter.local(), c_iter.local(), {ce}); @@ -205,4 +205,4 @@ void gemv_square_copy(C &&c, dr::shp::sparse_matrix &a, B &&b) { __detail::wait(events); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp index b7cd17dcc11..f124d2f2231 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp @@ -10,7 +10,7 @@ #include #endif -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -81,4 +81,4 @@ auto local_gemm(sycl::queue &q, shp::dense_matrix_view a, } // namespace __detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp index 142792ecfde..a6e0d4cf59e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp @@ -12,7 +12,7 @@ #include #endif -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -61,9 +61,9 @@ auto mkl_gemv(sycl::queue &q, csr_matrix_view a, Iter b, Iter c, oneapi::mkl::sparse::matrix_handle_t a_handle; oneapi::mkl::sparse::init_matrix_handle(&a_handle); - auto rowptr = dr::shp::__detail::local(a.rowptr_data()); - auto colind = dr::shp::__detail::local(a.colind_data()); - auto values = dr::shp::__detail::local(a.values_data()); + auto rowptr = experimental::dr::shp::__detail::local(a.rowptr_data()); + auto colind = experimental::dr::shp::__detail::local(a.colind_data()); + auto values = experimental::dr::shp::__detail::local(a.values_data()); oneapi::mkl::sparse::set_csr_data(q, a_handle, a.shape()[0], a.shape()[1], oneapi::mkl::index_base::zero, rowptr, @@ -97,4 +97,4 @@ auto local_gemv(sycl::queue &q, csr_matrix_view a, Iter b, } // namespace __detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp index 7bfd00eb178..96f1f4c91dd 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/reduce.hpp @@ -28,8 +28,8 @@ auto reduce_no_init_async(ExecutionPolicy &&policy, Iter first, Iter last, std::iter_value_t init = *new_last; - dr::__detail::direct_iterator d_first(first); - dr::__detail::direct_iterator d_last(new_last); + experimental::dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_last(new_last); return oneapi::dpl::experimental::reduce_async( std::forward(policy), d_first, d_last, @@ -41,8 +41,8 @@ template ) auto reduce_no_init_async(ExecutionPolicy &&policy, Iter first, Iter last, Fn &&fn) { - dr::__detail::direct_iterator d_first(first); - dr::__detail::direct_iterator d_last(last); + experimental::dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_last(last); return oneapi::dpl::experimental::reduce_async( std::forward(policy), d_first, d_last, @@ -51,9 +51,9 @@ auto reduce_no_init_async(ExecutionPolicy &&policy, Iter first, Iter last, } // namespace -namespace dr::shp { +namespace experimental::dr::shp { -template T reduce(ExecutionPolicy &&policy, R &&r, T init, BinaryOp &&binary_op) { @@ -63,13 +63,13 @@ T reduce(ExecutionPolicy &&policy, R &&r, T init, BinaryOp &&binary_op) { if constexpr (std::is_same_v, device_policy>) { using future_t = decltype(oneapi::dpl::experimental::reduce_async( - __detail::dpl_policy(0), dr::ranges::segments(r)[0].begin(), - dr::ranges::segments(r)[0].end(), init, binary_op)); + __detail::dpl_policy(0), experimental::dr::ranges::segments(r)[0].begin(), + experimental::dr::ranges::segments(r)[0].end(), init, binary_op)); std::vector futures; - for (auto &&segment : dr::ranges::segments(r)) { - auto &&local_policy = __detail::dpl_policy(dr::ranges::rank(segment)); + for (auto &&segment : experimental::dr::ranges::segments(r)) { + auto &&local_policy = __detail::dpl_policy(experimental::dr::ranges::rank(segment)); auto dist = rng::distance(segment); if (dist <= 0) { @@ -95,13 +95,13 @@ T reduce(ExecutionPolicy &&policy, R &&r, T init, BinaryOp &&binary_op) { } } -template +template T reduce(ExecutionPolicy &&policy, R &&r, T init) { return reduce(std::forward(policy), std::forward(r), init, std::plus<>()); } -template +template rng::range_value_t reduce(ExecutionPolicy &&policy, R &&r) { return reduce(std::forward(policy), std::forward(r), rng::range_value_t{}, std::plus<>()); @@ -109,7 +109,7 @@ rng::range_value_t reduce(ExecutionPolicy &&policy, R &&r) { // Iterator versions -template +template std::iter_value_t reduce(ExecutionPolicy &&policy, Iter first, Iter last) { return reduce(std::forward(policy), @@ -117,13 +117,13 @@ std::iter_value_t reduce(ExecutionPolicy &&policy, Iter first, std::plus<>()); } -template +template T reduce(ExecutionPolicy &&policy, Iter first, Iter last, T init) { return reduce(std::forward(policy), rng::subrange(first, last), init, std::plus<>()); } -template T reduce(ExecutionPolicy &&policy, Iter first, Iter last, T init, BinaryOp &&binary_op) { @@ -134,34 +134,34 @@ T reduce(ExecutionPolicy &&policy, Iter first, Iter last, T init, // Execution policy-less algorithms -template rng::range_value_t reduce(R &&r) { - return reduce(dr::shp::par_unseq, std::forward(r)); +template rng::range_value_t reduce(R &&r) { + return reduce(experimental::dr::shp::par_unseq, std::forward(r)); } -template T reduce(R &&r, T init) { - return reduce(dr::shp::par_unseq, std::forward(r), init); +template T reduce(R &&r, T init) { + return reduce(experimental::dr::shp::par_unseq, std::forward(r), init); } -template +template T reduce(R &&r, T init, BinaryOp &&binary_op) { - return reduce(dr::shp::par_unseq, std::forward(r), init, + return reduce(experimental::dr::shp::par_unseq, std::forward(r), init, std::forward(binary_op)); } -template +template std::iter_value_t reduce(Iter first, Iter last) { - return reduce(dr::shp::par_unseq, first, last); + return reduce(experimental::dr::shp::par_unseq, first, last); } -template +template T reduce(Iter first, Iter last, T init) { - return reduce(dr::shp::par_unseq, first, last, init); + return reduce(experimental::dr::shp::par_unseq, first, last, init); } -template +template T reduce(Iter first, Iter last, T init, BinaryOp &&binary_op) { - return reduce(dr::shp::par_unseq, first, last, init, + return reduce(experimental::dr::shp::par_unseq, first, last, init, std::forward(binary_op)); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp index b0a595cc62c..0790536ab92 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/sort.hpp @@ -14,7 +14,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -22,8 +22,8 @@ template sycl::event sort_async(LocalPolicy &&policy, InputIt first, InputIt last, Compare &&comp) { if (rng::distance(first, last) >= 2) { - dr::__detail::direct_iterator d_first(first); - dr::__detail::direct_iterator d_last(last); + experimental::dr::__detail::direct_iterator d_first(first); + experimental::dr::__detail::direct_iterator d_last(last); return oneapi::dpl::experimental::sort_async( std::forward(policy), d_first, d_last, std::forward(comp)); @@ -37,13 +37,13 @@ template (policy), d_start, d_end, d_value_first, d_value_last, d_result, @@ -53,17 +53,17 @@ OutputIt lower_bound(LocalPolicy &&policy, InputIt1 start, InputIt1 end, } // namespace __detail -template > +template > void sort(R &&r, Compare comp = Compare()) { - auto &&segments = dr::ranges::segments(r); + auto &&segments = experimental::dr::ranges::segments(r); if (rng::size(segments) == 0) { return; } else if (rng::size(segments) == 1) { auto &&segment = *rng::begin(segments); auto &&local_policy = - dr::shp::__detail::dpl_policy(dr::ranges::rank(segment)); - auto &&local_segment = dr::shp::__detail::local(segment); + experimental::dr::shp::__detail::dpl_policy(experimental::dr::ranges::rank(segment)); + auto &&local_segment = experimental::dr::shp::__detail::local(segment); __detail::sort_async(local_policy, rng::begin(local_segment), rng::end(local_segment), comp) @@ -86,11 +86,11 @@ void sort(R &&r, Compare comp = Compare()) { std::size_t segment_id = 0; for (auto &&segment : segments) { - auto &&q = dr::shp::__detail::queue(dr::ranges::rank(segment)); + auto &&q = experimental::dr::shp::__detail::queue(experimental::dr::ranges::rank(segment)); auto &&local_policy = - dr::shp::__detail::dpl_policy(dr::ranges::rank(segment)); + experimental::dr::shp::__detail::dpl_policy(experimental::dr::ranges::rank(segment)); - auto &&local_segment = dr::shp::__detail::local(segment); + auto &&local_segment = experimental::dr::shp::__detail::local(segment); auto s = __detail::sort_async(local_policy, rng::begin(local_segment), rng::end(local_segment), comp); @@ -112,12 +112,12 @@ void sort(R &&r, Compare comp = Compare()) { ++segment_id; } - dr::shp::__detail::wait(events); + experimental::dr::shp::__detail::wait(events); events.clear(); // Compute global medians by sorting medians and // computing `n_splitters` medians from the medians. - auto &&local_policy = dr::shp::__detail::dpl_policy(0); + auto &&local_policy = experimental::dr::shp::__detail::dpl_policy(0); __detail::sort_async(local_policy, medians, medians + n_segments * n_splitters, comp) .wait(); @@ -127,7 +127,7 @@ void sort(R &&r, Compare comp = Compare()) { // - Collect median of medians to get final splitters. // - Write splitters to [0, n_splitters) in `medians` - auto &&q = dr::shp::__detail::queue(0); + auto &&q = experimental::dr::shp::__detail::queue(0); q.single_task([=] { for (std::size_t i = 0; i < n_splitters; i++) { medians[i] = medians[std::size_t(step_size * (i + 1) + 0.5)]; @@ -144,11 +144,11 @@ void sort(R &&r, Compare comp = Compare()) { segment_id = 0; for (auto &&segment : segments) { - auto &&q = dr::shp::__detail::queue(dr::ranges::rank(segment)); + auto &&q = experimental::dr::shp::__detail::queue(experimental::dr::ranges::rank(segment)); auto &&local_policy = - dr::shp::__detail::dpl_policy(dr::ranges::rank(segment)); + experimental::dr::shp::__detail::dpl_policy(experimental::dr::ranges::rank(segment)); - auto &&local_segment = dr::shp::__detail::local(segment); + auto &&local_segment = experimental::dr::shp::__detail::local(segment); std::size_t *splitter_i = sycl::malloc_shared( n_splitters, q.get_device(), shp::context()); @@ -194,7 +194,7 @@ void sort(R &&r, Compare comp = Compare()) { segment_id = 0; for (auto &&segment : segments) { - auto &&q = dr::shp::__detail::queue(dr::ranges::rank(segment)); + auto &&q = experimental::dr::shp::__detail::queue(experimental::dr::ranges::rank(segment)); T *buffer = sycl::malloc_device(sorted_seg_sizes[segment_id], q); sorted_segments.push_back(buffer); @@ -205,7 +205,7 @@ void sort(R &&r, Compare comp = Compare()) { // Copy corresponding elements to each "sorted segment" segment_id = 0; for (auto &&segment : segments) { - auto &&local_segment = dr::shp::__detail::local(segment); + auto &&local_segment = experimental::dr::shp::__detail::local(segment); std::size_t *splitter_i = splitter_indices[segment_id]; @@ -232,13 +232,13 @@ void sort(R &&r, Compare comp = Compare()) { ++segment_id; } - dr::shp::__detail::wait(events); + experimental::dr::shp::__detail::wait(events); events.clear(); // Sort each of these new segments for (std::size_t i = 0; i < sorted_segments.size(); i++) { auto &&local_policy = - dr::shp::__detail::dpl_policy(dr::ranges::rank(segments[i])); + experimental::dr::shp::__detail::dpl_policy(experimental::dr::ranges::rank(segments[i])); T *seg = sorted_segments[i]; std::size_t n_elements = sorted_seg_sizes[i]; @@ -247,7 +247,7 @@ void sort(R &&r, Compare comp = Compare()) { events.push_back(e); } - dr::shp::__detail::wait(events); + experimental::dr::shp::__detail::wait(events); events.clear(); // Copy the results into the output. @@ -265,7 +265,7 @@ void sort(R &&r, Compare comp = Compare()) { rng::advance(d_first, n_elements); } - dr::shp::__detail::wait(events); + experimental::dr::shp::__detail::wait(events); // Free temporary memory. @@ -280,9 +280,9 @@ void sort(R &&r, Compare comp = Compare()) { sycl::free(medians, shp::context()); } -template > +template > void sort(RandomIt first, RandomIt last, Compare comp = Compare()) { sort(rng::subrange(first, last), comp); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp index 50eebd0698e..104c0e761a1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp @@ -7,12 +7,12 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { /** * Applies the given function to a range and stores the result in another range, * beginning at out. - * \param policy use `dr::shp::par_unseq` here only + * \param policy use `experimental::dr::shp::par_unseq` here only * \param in the range of elements to transform * \param out the beginning of the destination range, may be equal to the * beginning of `in` range \param fn operation to apply to input elements @@ -23,8 +23,8 @@ namespace dr::shp { */ template -auto transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, - dr::distributed_iterator auto out, auto &&fn) { +auto transform(ExecutionPolicy &&policy, experimental::dr::distributed_range auto &&in, + experimental::dr::distributed_iterator auto out, auto &&fn) { static_assert( // currently only one policy supported std::is_same_v, device_policy>); @@ -37,7 +37,7 @@ auto transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, for (auto &&[in_seg, out_seg] : views::zip(in, rng::subrange(out, out_end)).zipped_segments()) { auto in_device = policy.get_devices()[in_seg.rank()]; - auto &&q = __detail::queue(dr::ranges::rank(in_seg)); + auto &&q = __detail::queue(experimental::dr::ranges::rank(in_seg)); const std::size_t seg_size = rng::size(in_seg); assert(seg_size == rng::size(out_seg)); auto local_in_seg = __detail::local(in_seg); @@ -49,7 +49,7 @@ auto transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, })); } else { OutT *buffer = - sycl::malloc_device(seg_size, in_device, dr::shp::context()); + sycl::malloc_device(seg_size, in_device, experimental::dr::shp::context()); buffers.push_back(buffer); sycl::event compute_event = q.parallel_for( @@ -61,20 +61,20 @@ auto transform(ExecutionPolicy &&policy, dr::distributed_range auto &&in, __detail::wait(events); for (auto *b : buffers) - sycl::free(b, dr::shp::context()); + sycl::free(b, experimental::dr::shp::context()); return rng::unary_transform_result{ rng::end(in), out_end}; } -template +template auto transform(R &&in, Iter out, Fn &&fn) { - return transform(dr::shp::par_unseq, std::forward(in), + return transform(experimental::dr::shp::par_unseq, std::forward(in), std::forward(out), std::forward(fn)); } -template +template auto transform(ExecutionPolicy &&policy, Iter1 in_begin, Iter1 in_end, Iter2 out_end, Fn &&fn) { return transform( @@ -83,12 +83,12 @@ auto transform(ExecutionPolicy &&policy, Iter1 in_begin, Iter1 in_end, std::forward(out_end), std::forward(fn)); } -template auto transform(Iter1 in_begin, Iter1 in_end, Iter2 out_end, Fn &&fn) { - return transform(dr::shp::par_unseq, std::forward(in_begin), + return transform(experimental::dr::shp::par_unseq, std::forward(in_begin), std::forward(in_end), std::forward(out_end), std::forward(fn)); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp index 10beee77ca9..b54539b7955 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp @@ -10,7 +10,7 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { template using shared_allocator = sycl::usm_allocator; @@ -124,4 +124,4 @@ template class buffered_allocator { std::shared_ptr> buffers_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp index e0cf3175241..88829221b69 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp @@ -6,7 +6,7 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { namespace detail { @@ -25,4 +25,4 @@ inline std::tuple factor(std::size_t n) { } // namespace detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp index c70f5aff017..d77caded1ff 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class distributed_dense_matrix_accessor { public: @@ -25,9 +25,9 @@ template class distributed_dense_matrix_accessor { using scalar_value_type = rng::range_value_t; using scalar_reference = rng::range_reference_t; - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; - using reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; using iterator_category = std::random_access_iterator_tag; @@ -37,7 +37,7 @@ template class distributed_dense_matrix_accessor { using tile_type = L; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; constexpr distributed_dense_matrix_accessor() noexcept = default; constexpr ~distributed_dense_matrix_accessor() noexcept = default; @@ -131,30 +131,30 @@ template class distributed_dense_matrix_accessor { template using distributed_dense_matrix_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; template class distributed_dense_matrix { public: using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; using scalar_reference = rng::range_reference_t< - dr::shp::device_vector>>; + experimental::dr::shp::device_vector>>; using const_scalar_reference = rng::range_reference_t< - const dr::shp::device_vector>>; + const experimental::dr::shp::device_vector>>; - using reference = dr::shp::matrix_ref; - using const_reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; + using const_reference = experimental::dr::shp::matrix_ref; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; using iterator = distributed_dense_matrix_iterator< - T, dr::shp::device_vector>>; + T, experimental::dr::shp::device_vector>>; distributed_dense_matrix(key_type shape) - : shape_(shape), partition_(new dr::shp::block_cyclic()) { + : shape_(shape), partition_(new experimental::dr::shp::block_cyclic()) { init_(); } @@ -211,17 +211,17 @@ template class distributed_dense_matrix { std::size_t tn = std::min(tile_shape()[1], shape()[1] - j * tile_shape()[1]); - return dense_matrix_view>>>( + return dense_matrix_view>>>( iter, key_type{tm, tn}, tile_shape()[1], tiles_[i * grid_shape()[1] + j].rank()); } - std::vector>>>> + std::vector>>>> tiles() { - std::vector>>>> + std::vector>>>> views_; for (std::size_t i = 0; i < grid_shape_[0]; i++) { @@ -268,8 +268,8 @@ template class distributed_dense_matrix { } auto segments() { - std::vector>>>> + std::vector>>>> views_; for (std::size_t i = 0; i < grid_shape_[0]; i++) { @@ -289,7 +289,7 @@ template class distributed_dense_matrix { tiles_[i * grid_shape_[1] + j].rank()); } } - return dr::__detail::owning_view(std::move(views_)); + return experimental::dr::__detail::owning_view(std::move(views_)); } private: @@ -303,8 +303,8 @@ template class distributed_dense_matrix { for (std::size_t j = 0; j < grid_shape_[1]; j++) { std::size_t rank = partition_->tile_rank(shape(), {i, j}); - auto device = dr::shp::devices()[rank]; - dr::shp::device_allocator alloc(dr::shp::context(), device); + auto device = experimental::dr::shp::devices()[rank]; + experimental::dr::shp::device_allocator alloc(experimental::dr::shp::context(), device); std::size_t tile_size = tile_shape_[0] * tile_shape_[1]; @@ -324,9 +324,9 @@ template class distributed_dense_matrix { key_type shape_; key_type grid_shape_; key_type tile_shape_; - std::unique_ptr partition_; + std::unique_ptr partition_; - std::vector>> tiles_; + std::vector>> tiles_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp index 904458e5777..3cf785e0c76 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp @@ -7,12 +7,12 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { -template > +template > class duplicated_vector { public: - using segment_type = dr::shp::device_vector; + using segment_type = experimental::dr::shp::device_vector; using value_type = T; using size_type = std::size_t; @@ -25,9 +25,9 @@ class duplicated_vector { capacity_ = count; std::size_t rank = 0; - for (auto &&device : dr::shp::devices()) { + for (auto &&device : experimental::dr::shp::devices()) { segments_.emplace_back( - segment_type(size(), Allocator(dr::shp::context(), device), rank++)); + segment_type(size(), Allocator(experimental::dr::shp::context(), device), rank++)); } } @@ -45,4 +45,4 @@ class duplicated_vector { std::size_t size_ = 0; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp index df29dda07df..538dd09c172 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp @@ -10,21 +10,21 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { template class matrix_entry { public: using index_type = I; using map_type = T; - matrix_entry(dr::index index, const map_type &value) + matrix_entry(experimental::dr::index index, const map_type &value) : index_(index), value_(value) {} - matrix_entry(dr::index index, map_type &&value) + matrix_entry(experimental::dr::index index, map_type &&value) : index_(index), value_(std::move(value)) {} template requires(std::is_constructible_v) - matrix_entry(dr::index index, U &&value) + matrix_entry(experimental::dr::index index, U &&value) : index_(index), value_(std::forward(value)) {} template @@ -44,7 +44,7 @@ template class matrix_entry { return {{index_[0], index_[1]}, value_}; } - dr::index index() const noexcept { return index_; } + experimental::dr::index index() const noexcept { return index_; } map_type value() const noexcept { return value_; } @@ -81,33 +81,33 @@ template class matrix_entry { matrix_entry &operator=(matrix_entry &&) = default; private: - dr::index index_; + experimental::dr::index index_; map_type value_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp namespace std { template requires(!std::is_const_v) -void swap(dr::shp::matrix_entry a, dr::shp::matrix_entry b) { - dr::shp::matrix_entry other = a; +void swap(experimental::dr::shp::matrix_entry a, experimental::dr::shp::matrix_entry b) { + experimental::dr::shp::matrix_entry other = a; a = b; b = other; } template -struct tuple_element> - : tuple_element, T>> {}; +struct tuple_element> + : tuple_element, T>> {}; template -struct tuple_size> : integral_constant { +struct tuple_size> : integral_constant { }; } // namespace std -namespace dr::shp { +namespace experimental::dr::shp { template class matrix_ref { @@ -115,14 +115,14 @@ class matrix_ref { using scalar_type = T; using index_type = I; - using key_type = dr::index; + using key_type = experimental::dr::index; using map_type = T; using scalar_reference = TRef; - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; - matrix_ref(dr::index index, scalar_reference value) + matrix_ref(experimental::dr::index index, scalar_reference value) : index_(index), value_(value) {} operator value_type() const noexcept { return value_type(index_, value_); } @@ -143,7 +143,7 @@ class matrix_ref { } } - dr::index index() const noexcept { return index_; } + experimental::dr::index index() const noexcept { return index_; } scalar_reference value() const noexcept { return value_; } @@ -180,33 +180,33 @@ class matrix_ref { matrix_ref &operator=(matrix_ref &&) = default; private: - dr::index index_; + experimental::dr::index index_; scalar_reference value_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp namespace std { template requires(!std::is_const_v) -void swap(dr::shp::matrix_ref a, - dr::shp::matrix_ref b) { - dr::shp::matrix_entry other = a; +void swap(experimental::dr::shp::matrix_ref a, + experimental::dr::shp::matrix_ref b) { + experimental::dr::shp::matrix_entry other = a; a = b; b = other; } template -struct tuple_element> - : tuple_element, TRef>> {}; +struct tuple_element> + : tuple_element, TRef>> {}; template -struct tuple_size> +struct tuple_size> : integral_constant {}; template -inline decltype(auto) get(dr::shp::matrix_ref ref) +inline decltype(auto) get(experimental::dr::shp::matrix_ref ref) requires(Index <= 1) { if constexpr (Index == 0) { @@ -218,7 +218,7 @@ inline decltype(auto) get(dr::shp::matrix_ref ref) } template -inline decltype(auto) get(dr::shp::matrix_entry entry) +inline decltype(auto) get(experimental::dr::shp::matrix_entry entry) requires(Index <= 1) { if constexpr (Index == 0) { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp index 5574450ffc5..fa38ce43b1c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace tile { @@ -22,10 +22,10 @@ inline constexpr std::size_t div = std::numeric_limits::max(); class matrix_partition { public: - virtual std::size_t tile_rank(dr::index<> matrix_shape, - dr::index<> tile_id) const = 0; - virtual dr::index<> grid_shape(dr::index<> matrix_shape) const = 0; - virtual dr::index<> tile_shape(dr::index<> matrix_shape) const = 0; + virtual std::size_t tile_rank(experimental::dr::index<> matrix_shape, + experimental::dr::index<> tile_id) const = 0; + virtual experimental::dr::index<> grid_shape(experimental::dr::index<> matrix_shape) const = 0; + virtual experimental::dr::index<> tile_shape(experimental::dr::index<> matrix_shape) const = 0; virtual std::unique_ptr clone() const = 0; virtual ~matrix_partition(){}; @@ -33,17 +33,17 @@ class matrix_partition { class block_cyclic final : public matrix_partition { public: - block_cyclic(dr::index<> tile_shape = {dr::shp::tile::div, - dr::shp::tile::div}, - dr::index<> grid_shape = detail::factor(dr::shp::nprocs())) + block_cyclic(experimental::dr::index<> tile_shape = {experimental::dr::shp::tile::div, + experimental::dr::shp::tile::div}, + experimental::dr::index<> grid_shape = detail::factor(experimental::dr::shp::nprocs())) : tile_shape_(tile_shape), grid_shape_(grid_shape) {} block_cyclic(const block_cyclic &) noexcept = default; - dr::index<> tile_shape() const { return tile_shape_; } + experimental::dr::index<> tile_shape() const { return tile_shape_; } - std::size_t tile_rank(dr::index<> matrix_shape, dr::index<> tile_id) const { - dr::index<> pgrid_idx = {tile_id[0] % grid_shape_[0], + std::size_t tile_rank(experimental::dr::index<> matrix_shape, experimental::dr::index<> tile_id) const { + experimental::dr::index<> pgrid_idx = {tile_id[0] % grid_shape_[0], tile_id[1] % grid_shape_[1]}; auto pgrid = processor_grid_(); @@ -51,19 +51,19 @@ class block_cyclic final : public matrix_partition { return pgrid[pgrid_idx[0] * grid_shape_[1] + pgrid_idx[1]]; } - dr::index<> grid_shape(dr::index<> matrix_shape) const { + experimental::dr::index<> grid_shape(experimental::dr::index<> matrix_shape) const { auto ts = this->tile_shape(matrix_shape); - return dr::index<>((matrix_shape[0] + ts[0] - 1) / ts[0], + return experimental::dr::index<>((matrix_shape[0] + ts[0] - 1) / ts[0], (matrix_shape[1] + ts[1] - 1) / ts[1]); } - dr::index<> tile_shape(dr::index<> matrix_shape) const { + experimental::dr::index<> tile_shape(experimental::dr::index<> matrix_shape) const { std::array tshape = {tile_shape_[0], tile_shape_[1]}; constexpr std::size_t ndims = 2; for (std::size_t i = 0; i < ndims; i++) { - if (tshape[i] == dr::shp::tile::div) { + if (tshape[i] == experimental::dr::shp::tile::div) { tshape[i] = (matrix_shape[i] + grid_shape_[i] - 1) / grid_shape_[i]; } } @@ -85,15 +85,15 @@ class block_cyclic final : public matrix_partition { return grid; } - dr::index<> tile_shape_; - dr::index<> grid_shape_; -}; // namespace dr::shp + experimental::dr::index<> tile_shape_; + experimental::dr::index<> grid_shape_; +}; // namespace experimental::dr::shp inline std::vector partition_matmul(std::size_t m, std::size_t n, std::size_t k) { - dr::index<> c_pgrid = detail::factor(shp::nprocs()); + experimental::dr::index<> c_pgrid = detail::factor(shp::nprocs()); - block_cyclic c_block({dr::shp::tile::div, dr::shp::tile::div}, + block_cyclic c_block({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, {c_pgrid[0], c_pgrid[1]}); std::size_t k_block; @@ -104,12 +104,12 @@ inline std::vector partition_matmul(std::size_t m, std::size_t n, k_block = (shp::nprocs() + c_pgrid[1] - 1) / c_pgrid[1]; } - block_cyclic a_block({dr::shp::tile::div, dr::shp::tile::div}, + block_cyclic a_block({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, {c_pgrid[0], k_block}); - block_cyclic b_block({dr::shp::tile::div, dr::shp::tile::div}, + block_cyclic b_block({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, {k_block, c_pgrid[1]}); return {a_block, b_block, c_block}; } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp index 92b25a5e34e..1a08e3ecbd3 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp @@ -13,7 +13,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template > class dense_matrix { @@ -26,9 +26,9 @@ class dense_matrix { using scalar_pointer = typename std::allocator_traits::pointer; using scalar_reference = std::iter_reference_t; - using reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; using map_type = T; using iterator = dense_matrix_iterator; @@ -140,4 +140,4 @@ class dense_matrix { size_type ld_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp index 909009139e4..a96f4555256 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template requires(rng::viewable_range) @@ -121,42 +121,42 @@ class distributed_range_accessor { template using distributed_sparse_matrix_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; template class sparse_matrix { public: using size_type = std::size_t; using difference_type = std::ptrdiff_t; - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; using scalar_reference = rng::range_reference_t< - dr::shp::device_vector>>; + experimental::dr::shp::device_vector>>; using const_scalar_reference = rng::range_reference_t< - const dr::shp::device_vector>>; + const experimental::dr::shp::device_vector>>; - using reference = dr::shp::matrix_ref; - using const_reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; + using const_reference = experimental::dr::shp::matrix_ref; - using key_type = dr::index; + using key_type = experimental::dr::index; - using segment_type = dr::shp::csr_matrix_view< + using segment_type = experimental::dr::shp::csr_matrix_view< T, I, - rng::iterator_t>>, - rng::iterator_t>>>; + rng::iterator_t>>, + rng::iterator_t>>>; - // using iterator = sparse_matrix_iterator>>; + // using iterator = sparse_matrix_iterator>>; using iterator = distributed_sparse_matrix_iterator &&>; sparse_matrix(key_type shape) - : shape_(shape), partition_(new dr::shp::block_cyclic()) { + : shape_(shape), partition_(new experimental::dr::shp::block_cyclic()) { init_(); } sparse_matrix(key_type shape, double density) - : shape_(shape), partition_(new dr::shp::block_cyclic()) { + : shape_(shape), partition_(new experimental::dr::shp::block_cyclic()) { init_random_(density); } @@ -217,15 +217,15 @@ template class sparse_matrix { colind.resize(tile_view.size()); rowptr.resize(tile_view.shape()[0] + 1); - auto v_e = dr::shp::copy_async(tile_view.values_data(), + auto v_e = experimental::dr::shp::copy_async(tile_view.values_data(), tile_view.values_data() + values.size(), values.data()); - auto c_e = dr::shp::copy_async(tile_view.colind_data(), + auto c_e = experimental::dr::shp::copy_async(tile_view.colind_data(), tile_view.colind_data() + colind.size(), colind.data()); - auto r_e = dr::shp::copy_async(tile_view.rowptr_data(), + auto r_e = experimental::dr::shp::copy_async(tile_view.rowptr_data(), tile_view.rowptr_data() + rowptr.size(), rowptr.data()); @@ -320,9 +320,9 @@ template class sparse_matrix { for (std::size_t j = 0; j < grid_shape_[1]; j++) { std::size_t rank = partition_->tile_rank(shape(), {i, j}); - auto device = dr::shp::devices()[rank]; - dr::shp::device_allocator alloc(dr::shp::context(), device); - dr::shp::device_allocator i_alloc(dr::shp::context(), device); + auto device = experimental::dr::shp::devices()[rank]; + experimental::dr::shp::device_allocator alloc(experimental::dr::shp::context(), device); + experimental::dr::shp::device_allocator i_alloc(experimental::dr::shp::context(), device); values_.emplace_back(1, alloc, rank); rowptr_.emplace_back(2, i_alloc, rank); @@ -354,28 +354,28 @@ template class sparse_matrix { std::size_t tn = std::min(tile_shape_[1], shape()[1] - j * tile_shape_[1]); - auto device = dr::shp::devices()[rank]; - dr::shp::device_allocator alloc(dr::shp::context(), device); - dr::shp::device_allocator i_alloc(dr::shp::context(), device); + auto device = experimental::dr::shp::devices()[rank]; + experimental::dr::shp::device_allocator alloc(experimental::dr::shp::context(), device); + experimental::dr::shp::device_allocator i_alloc(experimental::dr::shp::context(), device); auto seed = i * grid_shape_[1] + j; auto csr = generate_random_csr(key_type(tm, tn), density, seed); std::size_t nnz = csr.size(); - dr::shp::device_vector> values( + experimental::dr::shp::device_vector> values( csr.size(), alloc, rank); - dr::shp::device_vector> rowptr( + experimental::dr::shp::device_vector> rowptr( tm + 1, i_alloc, rank); - dr::shp::device_vector> colind( + experimental::dr::shp::device_vector> colind( csr.size(), i_alloc, rank); - dr::shp::copy(csr.values_data(), csr.values_data() + csr.size(), + experimental::dr::shp::copy(csr.values_data(), csr.values_data() + csr.size(), values.data()); - dr::shp::copy(csr.rowptr_data(), csr.rowptr_data() + tm + 1, + experimental::dr::shp::copy(csr.rowptr_data(), csr.rowptr_data() + tm + 1, rowptr.data()); - dr::shp::copy(csr.colind_data(), csr.colind_data() + csr.size(), + experimental::dr::shp::copy(csr.colind_data(), csr.colind_data() + csr.size(), colind.data()); values_.push_back(std::move(values)); @@ -397,11 +397,11 @@ template class sparse_matrix { key_type shape_; key_type grid_shape_; key_type tile_shape_; - std::unique_ptr partition_; + std::unique_ptr partition_; - std::vector>> values_; - std::vector>> rowptr_; - std::vector>> colind_; + std::vector>> values_; + std::vector>> rowptr_; + std::vector>> colind_; std::vector nnz_; std::size_t total_nnz_ = 0; @@ -410,4 +410,4 @@ template class sparse_matrix { std::vector segments_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp index b4d76f98358..7ba49cb2546 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp @@ -11,11 +11,11 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { -inline constexpr auto local = dr::ranges::__detail::local; +inline constexpr auto local = experimental::dr::ranges::__detail::local; template concept is_syclmemcopyable = std::is_same_v, Dest> && @@ -87,4 +87,4 @@ inline void wait(const std::vector &events) { } // namespace __detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp index 71dd37a5ae6..a9d9a7a1dd0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp @@ -10,7 +10,7 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { template requires(std::is_trivially_copyable_v || std::is_void_v) @@ -141,4 +141,4 @@ class device_ptr { T *pointer_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp index 8b51ca2122e..5cbf13f9544 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template requires(std::is_trivially_copyable_v || std::is_void_v) @@ -24,7 +24,7 @@ class device_ref { #ifdef __SYCL_DEVICE_ONLY__ return *pointer_; #else - auto &&q = dr::shp::__detail::default_queue(); + auto &&q = experimental::dr::shp::__detail::default_queue(); char buffer[sizeof(T)] __attribute__((aligned(sizeof(T)))); q.memcpy(reinterpret_cast(buffer), pointer_, sizeof(T)).wait(); return *reinterpret_cast(buffer); @@ -37,7 +37,7 @@ class device_ref { #ifdef __SYCL_DEVICE_ONLY__ *pointer_ = value; #else - auto &&q = dr::shp::__detail::default_queue(); + auto &&q = experimental::dr::shp::__detail::default_queue(); q.memcpy(pointer_, &value, sizeof(T)).wait(); #endif return *this; @@ -57,4 +57,4 @@ class device_ref { T *pointer_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp index 2a779f900c2..89147476bee 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { // A `device_span` is simply a normal `std::span` that's // been decorated with an extra `rank()` function, showing @@ -41,7 +41,7 @@ class device_span : public std::span { */ template -class device_span : public dr::shp::span { +class device_span : public experimental::dr::shp::span { public: constexpr device_span() noexcept {} @@ -51,22 +51,22 @@ class device_span : public dr::shp::span { using reference = std::iter_reference_t; template - requires(dr::remote_range) + requires(experimental::dr::remote_range) device_span(R &&r) - : dr::shp::span(rng::begin(r), rng::size(r)), - rank_(dr::ranges::rank(r)) {} + : experimental::dr::shp::span(rng::begin(r), rng::size(r)), + rank_(experimental::dr::ranges::rank(r)) {} template device_span(R &&r, std::size_t rank) - : dr::shp::span(rng::begin(r), rng::size(r)), rank_(rank) {} + : experimental::dr::shp::span(rng::begin(r), rng::size(r)), rank_(rank) {} template constexpr device_span(It first, std::size_t count, std::size_t rank) - : dr::shp::span(first, count), rank_(rank) {} + : experimental::dr::shp::span(first, count), rank_(rank) {} template constexpr device_span(It first, End last, std::size_t rank) - : dr::shp::span(first, last), rank_(rank) {} + : experimental::dr::shp::span(first, last), rank_(rank) {} constexpr std::size_t rank() const noexcept { return rank_; } @@ -94,4 +94,4 @@ template device_span(R &&, std::size_t) -> device_span, rng::iterator_t>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp index 0d6c97a5dcf..b9f1ea4ff42 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp @@ -7,14 +7,14 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template -class device_vector : public dr::shp::vector { +class device_vector : public experimental::dr::shp::vector { public: constexpr device_vector() noexcept {} - using base = dr::shp::vector; + using base = experimental::dr::shp::vector; using value_type = T; using size_type = std::size_t; @@ -34,4 +34,4 @@ template device_vector(std::size_t, const Alloc, std::size_t) -> device_vector; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp index 265053504b7..59992298f6f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp @@ -12,7 +12,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class distributed_span_accessor { public: @@ -102,7 +102,7 @@ template class distributed_span_accessor { } auto segments() const noexcept { - return dr::__detail::drop_segments(segments_, segment_id_, idx_); + return experimental::dr::__detail::drop_segments(segments_, segment_id_, idx_); } private: @@ -121,7 +121,7 @@ template class distributed_span_accessor { template using distributed_span_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; template class distributed_span : public rng::view_interface> { @@ -129,7 +129,7 @@ class distributed_span : public rng::view_interface> { using element_type = T; using value_type = std::remove_cv_t; - using segment_type = dr::shp::device_span; + using segment_type = experimental::dr::shp::device_span; using size_type = rng::range_size_t; using difference_type = rng::range_difference_t; @@ -154,21 +154,21 @@ class distributed_span : public rng::view_interface> { operator=(const distributed_span &) noexcept = default; template - requires(dr::remote_range>) + requires(experimental::dr::remote_range>) constexpr distributed_span(R &&segments) { for (auto &&segment : segments) { std::size_t size = rng::size(segment); segments_.push_back( - segment_type(rng::begin(segment), size, dr::ranges::rank(segment))); + segment_type(rng::begin(segment), size, experimental::dr::ranges::rank(segment))); size_ += size; } } - template constexpr distributed_span(R &&r) { - for (auto &&segment : dr::ranges::segments(std::forward(r))) { + template constexpr distributed_span(R &&r) { + for (auto &&segment : experimental::dr::ranges::segments(std::forward(r))) { std::size_t size = rng::size(segment); segments_.push_back( - segment_type(rng::begin(segment), size, dr::ranges::rank(segment))); + segment_type(rng::begin(segment), size, experimental::dr::ranges::rank(segment))); size_ += size; } } @@ -249,9 +249,9 @@ distributed_span(R &&segments) -> distributed_span>, rng::iterator_t>>; -template +template distributed_span(R &&r) -> distributed_span< rng::range_value_t, - rng::iterator_t>>; + rng::iterator_t>>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp index a459a8df072..1175f7ce6d5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp @@ -14,7 +14,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class distributed_vector_accessor { public: @@ -92,7 +92,7 @@ template class distributed_vector_accessor { } auto segments() const noexcept { - return dr::__detail::drop_segments(segments_, segment_id_, idx_); + return experimental::dr::__detail::drop_segments(segments_, segment_id_, idx_); } private: @@ -108,17 +108,17 @@ template class distributed_vector_accessor { template using distributed_vector_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; // TODO: support teams, distributions /// distributed vector -template > +template > struct distributed_vector { public: - using segment_type = dr::shp::device_vector; + using segment_type = experimental::dr::shp::device_vector; using const_segment_type = - std::add_const_t>; + std::add_const_t>; using value_type = T; using size_type = std::size_t; @@ -137,27 +137,27 @@ struct distributed_vector { using allocator_type = Allocator; distributed_vector(std::size_t count = 0) { - assert(dr::shp::devices().size() > 0); + assert(experimental::dr::shp::devices().size() > 0); size_ = count; segment_size_ = - (count + dr::shp::devices().size() - 1) / dr::shp::devices().size(); - capacity_ = segment_size_ * dr::shp::devices().size(); + (count + experimental::dr::shp::devices().size() - 1) / experimental::dr::shp::devices().size(); + capacity_ = segment_size_ * experimental::dr::shp::devices().size(); std::size_t rank = 0; - for (auto &&device : dr::shp::devices()) { + for (auto &&device : experimental::dr::shp::devices()) { segments_.emplace_back(segment_type( - segment_size_, Allocator(dr::shp::context(), device), rank++)); + segment_size_, Allocator(experimental::dr::shp::context(), device), rank++)); } } distributed_vector(std::size_t count, const T &value) : distributed_vector(count) { - dr::shp::fill(*this, value); + experimental::dr::shp::fill(*this, value); } distributed_vector(std::initializer_list init) : distributed_vector(init.size()) { - dr::shp::copy(rng::begin(init), rng::end(init), begin()); + experimental::dr::shp::copy(rng::begin(init), rng::end(init), begin()); } reference operator[](size_type pos) { @@ -174,10 +174,10 @@ struct distributed_vector { size_type size() const noexcept { return size_; } - auto segments() { return dr::__detail::take_segments(segments_, size()); } + auto segments() { return experimental::dr::__detail::take_segments(segments_, size()); } auto segments() const { - return dr::__detail::take_segments(segments_, size()); + return experimental::dr::__detail::take_segments(segments_, size()); } iterator begin() { return iterator(segments_, 0, 0, segment_size_); } @@ -201,7 +201,7 @@ struct distributed_vector { void resize(size_type count, const value_type &value) { distributed_vector other(count, value); std::size_t copy_size = std::min(other.size(), size()); - dr::shp::copy(begin(), begin() + copy_size, other.begin()); + experimental::dr::shp::copy(begin(), begin() + copy_size, other.begin()); *this = std::move(other); } @@ -214,4 +214,4 @@ struct distributed_vector { std::size_t segment_size_ = 0; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp index 185dcff1016..7678e479f07 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp @@ -9,7 +9,7 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { template class future { public: @@ -45,4 +45,4 @@ template class future { std::vector events_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp index b31eb2b3fa5..667d21f6ef0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/init.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -103,4 +103,4 @@ inline auto &dpl_policy(std::size_t rank) { } // namespace __detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp index 36fb81dd3c5..14e536767b4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class id { public: @@ -92,7 +92,7 @@ class segment_range_accessor { size_type idx_ = 0; }; -using segment_range_iterator = dr::iterator_adaptor; +using segment_range_iterator = experimental::dr::iterator_adaptor; template class segment_range { public: @@ -131,8 +131,8 @@ template class segment_range { /* template auto distributed_iota_view(R &&r) { - static_assert(dr::distributed_contiguous_range); - if constexpr (dr::distributed_contiguous_range) { + static_assert(experimental::dr::distributed_contiguous_range); + if constexpr (experimental::dr::distributed_contiguous_range) { std::vector> iota_segments; std::size_t global_offset = 0; std::size_t segment_id = 0; @@ -142,11 +142,11 @@ template auto distributed_iota_view(R &&r) { global_offset += segment.size(); segment_id++; } - return dr::shp::distributed_span(iota_segments); + return experimental::dr::shp::distributed_span(iota_segments); } else { return segment_range(0, rng::size(r), 0); } } */ -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp index 4373e51bb5b..19864776395 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp @@ -7,11 +7,11 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template auto enumerate(R &&r) { auto i = rng::views::iota(uint32_t(0), uint32_t(rng::size(r))); - return dr::shp::zip_view(i, r); + return experimental::dr::shp::zip_view(i, r); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp index e297405d30c..4bc2c0ed4d3 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/span.hpp @@ -8,10 +8,10 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { template -class span : public rng::view_interface> { +class span : public rng::view_interface> { public: static_assert(std::is_same_v, T>); @@ -58,4 +58,4 @@ span(R &&) -> span, rng::iterator_t>; template span(Iter first, std::size_t count) -> span, Iter>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp index 479d3bf5207..ffbcb323df6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp @@ -7,7 +7,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template sycl::device select_device(Selector &&selector) { sycl::device d; @@ -199,22 +199,22 @@ template void print_range_details(R &&r, std::string label = "") { std::cout << "\"" << label << "\" "; } - std::cout << "distributed range with " << rng::size(dr::ranges::segments(r)) + std::cout << "distributed range with " << rng::size(experimental::dr::ranges::segments(r)) << " segments." << std::endl; std::size_t idx = 0; - for (auto &&segment : dr::ranges::segments(r)) { + for (auto &&segment : experimental::dr::ranges::segments(r)) { std::cout << "Seg " << idx++ << ", size " << segment.size() << " (rank " - << dr::ranges::rank(segment) << ")" << std::endl; + << experimental::dr::ranges::rank(segment) << ")" << std::endl; } } -template +template void range_details(R &&r, std::size_t width = 80) { std::size_t size = rng::size(r); for (auto &&[idx, segment] : - dr::__detail::enumerate(dr::ranges::segments(r))) { + experimental::dr::__detail::enumerate(experimental::dr::ranges::segments(r))) { std::size_t local_size = rng::size(segment); double percent = double(local_size) / size; @@ -228,7 +228,7 @@ void range_details(R &&r, std::size_t width = 80) { std::size_t after_whitespace = whitespace - initial_whitespace; std::cout << "[" << std::string(initial_whitespace, ' ') - << dr::ranges::rank(segment) << std::string(after_whitespace, ' ') + << experimental::dr::ranges::rank(segment) << std::string(after_whitespace, ' ') << "]"; } std::cout << std::endl; @@ -243,4 +243,4 @@ concept sycl_device_selector = requires(T &t, const sycl::device &device) { } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp index f95649a0135..24f0dc822f6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp @@ -8,14 +8,14 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { template > class coo_matrix { public: - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; using scalar_type = T; using index_type = I; using size_type = std::size_t; @@ -23,7 +23,7 @@ class coo_matrix { using allocator_type = Allocator; - using key_type = dr::index; + using key_type = experimental::dr::index; using map_type = T; using backend_allocator_type = typename std::allocator_traits< @@ -33,14 +33,14 @@ class coo_matrix { using iterator = typename backend_type::iterator; using const_iterator = typename backend_type::const_iterator; - using reference = dr::shp::matrix_ref; - using const_reference = dr::shp::matrix_ref, I>; + using reference = experimental::dr::shp::matrix_ref; + using const_reference = experimental::dr::shp::matrix_ref, I>; using scalar_reference = T &; - coo_matrix(dr::index shape) : shape_(shape) {} + coo_matrix(experimental::dr::index shape) : shape_(shape) {} - dr::index shape() const noexcept { return shape_; } + experimental::dr::index shape() const noexcept { return shape_; } size_type size() const noexcept { return tuples_.size(); } @@ -123,7 +123,7 @@ class coo_matrix { }); } - void reshape(dr::index shape) { + void reshape(experimental::dr::index shape) { bool all_inside = true; for (auto &&[index, v] : *this) { auto &&[i, j] = index; @@ -161,10 +161,10 @@ class coo_matrix { } private: - dr::index shape_; + experimental::dr::index shape_; backend_type tuples_; }; } // namespace __detail -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp index f5ebc9ae9b5..e3ebeb099ed 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace { @@ -27,7 +27,7 @@ using uniform_distribution_t = typename uniform_distribution::type; } // namespace template -auto generate_random_csr(dr::index shape, double density = 0.01, +auto generate_random_csr(experimental::dr::index shape, double density = 0.01, unsigned int seed = 0) { assert(density >= 0.0 && density < 1.0); @@ -89,4 +89,4 @@ auto generate_random_csr(dr::index shape, double density = 0.01, return csr_matrix_view(values, rowptr, colind, shape, nnz, 0); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp index ff6bf29c357..14c1e24c6e1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp @@ -15,7 +15,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -24,7 +24,7 @@ namespace __detail { // 2) `tuples` has shape `shape` // 3) `tuples` has `nnz` elements template -auto convert_to_csr(Tuples &&tuples, dr::index<> shape, std::size_t nnz, +auto convert_to_csr(Tuples &&tuples, experimental::dr::index<> shape, std::size_t nnz, Allocator &&allocator) { auto &&[index, v] = *tuples.begin(); auto &&[i, j] = index; @@ -67,7 +67,7 @@ auto convert_to_csr(Tuples &&tuples, dr::index<> shape, std::size_t nnz, } return csr_matrix_view(values, rowptr, colind, - dr::index(shape[0], shape[1]), nnz, 0); + experimental::dr::index(shape[0], shape[1]), nnz, 0); } /// Read in the Matrix Market file at location `file_path` and a return @@ -209,7 +209,7 @@ inline coo_matrix mmread(std::string file_path, bool one_indexed = true) { } template -void destroy_csr_matrix_view(dr::shp::csr_matrix_view view, +void destroy_csr_matrix_view(experimental::dr::shp::csr_matrix_view view, Allocator &&alloc) { alloc.deallocate(view.values_data(), view.size()); typename std::allocator_traits::template rebind_alloc i_alloc( @@ -221,25 +221,25 @@ void destroy_csr_matrix_view(dr::shp::csr_matrix_view view, } // namespace __detail template -auto create_distributed(dr::shp::csr_matrix_view local_mat, +auto create_distributed(experimental::dr::shp::csr_matrix_view local_mat, const matrix_partition &partition) { - dr::shp::sparse_matrix a(local_mat.shape(), partition); + experimental::dr::shp::sparse_matrix a(local_mat.shape(), partition); - std::vector> views; + std::vector> views; std::vector events; views.reserve(a.grid_shape()[0] * a.grid_shape()[1]); for (I i = 0; i < a.grid_shape()[0]; i++) { for (I j = 0; j < a.grid_shape()[1]; j++) { auto &&tile = a.tile({i, j}); - dr::index row_bounds(i * a.tile_shape()[0], + experimental::dr::index row_bounds(i * a.tile_shape()[0], i * a.tile_shape()[0] + tile.shape()[0]); - dr::index column_bounds(j * a.tile_shape()[1], + experimental::dr::index column_bounds(j * a.tile_shape()[1], j * a.tile_shape()[1] + tile.shape()[1]); auto local_submat = local_mat.submatrix(row_bounds, column_bounds); - auto submatrix_shape = dr::index(row_bounds[1] - row_bounds[0], + auto submatrix_shape = experimental::dr::index(row_bounds[1] - row_bounds[0], column_bounds[1] - column_bounds[0]); auto copied_submat = __detail::convert_to_csr( @@ -281,9 +281,9 @@ template auto mmread(std::string file_path, bool one_indexed = true) { return mmread( file_path, - dr::shp::block_cyclic({dr::shp::tile::div, dr::shp::tile::div}, - {dr::shp::nprocs(), 1}), + experimental::dr::shp::block_cyclic({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, + {experimental::dr::shp::nprocs(), 1}), one_indexed); } -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp index 7860e1f22ed..3a4b35cb7ae 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp @@ -6,7 +6,7 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { // TODO: deal properly with non-trivially destructible types // - constructors, destructors, assign @@ -245,4 +245,4 @@ template > class vector { allocator_type allocator_; }; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp index fecf63954ef..7d8f1813cd2 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class csr_matrix_view_accessor { @@ -21,9 +21,9 @@ class csr_matrix_view_accessor { using index_type = I; - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; - using reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; using iterator_category = std::random_access_iterator_tag; @@ -31,7 +31,7 @@ class csr_matrix_view_accessor { using const_iterator_accessor = iterator_accessor; using nonconst_iterator_accessor = iterator_accessor; - using key_type = dr::index; + using key_type = experimental::dr::index; constexpr csr_matrix_view_accessor() noexcept = default; constexpr ~csr_matrix_view_accessor() noexcept = default; @@ -119,7 +119,7 @@ class csr_matrix_view_accessor { template using csr_matrix_view_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; template class csr_matrix_view @@ -129,12 +129,12 @@ class csr_matrix_view using difference_type = std::ptrdiff_t; using scalar_reference = std::iter_reference_t; - using reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; using scalar_type = T; using index_type = I; - using key_type = dr::index; + using key_type = experimental::dr::index; using map_type = T; using iterator = csr_matrix_view_iterator; @@ -222,4 +222,4 @@ csr_matrix_view(TIter, IIter, IIter, Args &&...) -> csr_matrix_view, std::iter_value_t, TIter, IIter>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp index 71286bd4734..4543af1dd04 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp @@ -8,7 +8,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class dense_matrix_column_accessor { public: using size_type = std::size_t; @@ -17,9 +17,9 @@ template class dense_matrix_column_accessor { using scalar_value_type = std::iter_value_t; using scalar_reference = std::iter_reference_t; - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; - using reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; using iterator_category = std::random_access_iterator_tag; @@ -27,7 +27,7 @@ template class dense_matrix_column_accessor { using const_iterator_accessor = iterator_accessor; using nonconst_iterator_accessor = iterator_accessor; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; constexpr dense_matrix_column_accessor() noexcept = default; constexpr ~dense_matrix_column_accessor() noexcept = default; @@ -72,7 +72,7 @@ template class dense_matrix_column_accessor { template using dense_matrix_column_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; template class dense_matrix_column_view { public: @@ -81,7 +81,7 @@ template class dense_matrix_column_view { using scalar_reference = std::iter_reference_t; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; using map_type = T; using iterator = dense_matrix_column_iterator; @@ -108,4 +108,4 @@ template dense_matrix_column_view(Iter, std::size_t, std::size_t, std::size_t) -> dense_matrix_column_view, Iter>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp index fb2ff89b914..b2674679b20 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp @@ -12,7 +12,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class dense_matrix_accessor { public: @@ -22,9 +22,9 @@ template class dense_matrix_accessor { using scalar_type = std::iter_value_t; using scalar_reference = std::iter_reference_t; - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; - using reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; using iterator_category = std::random_access_iterator_tag; @@ -32,7 +32,7 @@ template class dense_matrix_accessor { using const_iterator_accessor = iterator_accessor; using nonconst_iterator_accessor = iterator_accessor; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; constexpr dense_matrix_accessor() noexcept = default; constexpr ~dense_matrix_accessor() noexcept = default; @@ -101,9 +101,9 @@ template class dense_matrix_accessor { template using dense_matrix_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; template using dense_matrix_view_iterator = dense_matrix_iterator; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp index dfe28d46f68..4ad164708b7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp @@ -14,7 +14,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class dense_matrix_view @@ -24,9 +24,9 @@ class dense_matrix_view using difference_type = std::ptrdiff_t; using scalar_reference = std::iter_reference_t; - using reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; using map_type = T; using iterator = dense_matrix_view_iterator; @@ -110,15 +110,15 @@ class dense_matrix_view }; template -dense_matrix_view(Iter, dr::index<>, std::size_t) +dense_matrix_view(Iter, experimental::dr::index<>, std::size_t) -> dense_matrix_view, Iter>; template -dense_matrix_view(Iter, dr::index<>) +dense_matrix_view(Iter, experimental::dr::index<>) -> dense_matrix_view, Iter>; template dense_matrix_view(dense_matrix &) -> dense_matrix_view::pointer>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp index 18d8e1d82b0..d88e0b3682c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { template class dense_matrix_row_accessor { public: using size_type = std::size_t; @@ -18,9 +18,9 @@ template class dense_matrix_row_accessor { using scalar_value_type = std::iter_value_t; using scalar_reference = std::iter_reference_t; - using value_type = dr::shp::matrix_entry; + using value_type = experimental::dr::shp::matrix_entry; - using reference = dr::shp::matrix_ref; + using reference = experimental::dr::shp::matrix_ref; using iterator_category = std::random_access_iterator_tag; @@ -28,7 +28,7 @@ template class dense_matrix_row_accessor { using const_iterator_accessor = iterator_accessor; using nonconst_iterator_accessor = iterator_accessor; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; constexpr dense_matrix_row_accessor() noexcept = default; constexpr ~dense_matrix_row_accessor() noexcept = default; @@ -72,7 +72,7 @@ template class dense_matrix_row_accessor { template using dense_matrix_row_iterator = - dr::iterator_adaptor>; + experimental::dr::iterator_adaptor>; template class dense_matrix_row_view { public: @@ -81,7 +81,7 @@ template class dense_matrix_row_view { using scalar_reference = std::iter_reference_t; - using key_type = dr::index<>; + using key_type = experimental::dr::index<>; using map_type = T; using iterator = dense_matrix_row_iterator; @@ -106,4 +106,4 @@ template dense_matrix_row_view(Iter, std::size_t, std::size_t) -> dense_matrix_row_view, Iter>; -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp index c3455e9585a..2818d82cd4c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp @@ -6,7 +6,7 @@ #include -namespace dr::shp { +namespace experimental::dr::shp { namespace views { @@ -30,7 +30,7 @@ class enumerate_adapter_closure { requires(rng::sized_range) auto operator()(R &&r) const { using W = std::uint32_t; - return dr::shp::zip_view(rng::views::iota(W(0), W(rng::size(r))), + return experimental::dr::shp::zip_view(rng::views::iota(W(0), W(rng::size(r))), std::forward(r)); } @@ -53,4 +53,4 @@ inline constexpr auto enumerate = enumerate_fn_{}; } // namespace views -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp index aa5887c50cd..eb503a24961 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp @@ -11,19 +11,19 @@ #include #include -namespace dr::shp { +namespace experimental::dr::shp { namespace views { -template -auto slice(R &&r, dr::index<> slice_indices) { - return dr::shp::distributed_span(dr::ranges::segments(std::forward(r))) +template +auto slice(R &&r, experimental::dr::index<> slice_indices) { + return experimental::dr::shp::distributed_span(experimental::dr::ranges::segments(std::forward(r))) .subspan(slice_indices[0], slice_indices[1] - slice_indices[0]); } class slice_adaptor_closure { public: - slice_adaptor_closure(dr::index<> slice_indices) : idx_(slice_indices) {} + slice_adaptor_closure(experimental::dr::index<> slice_indices) : idx_(slice_indices) {} template auto operator()(R &&r) const { return slice(std::forward(r), idx_); @@ -35,13 +35,13 @@ class slice_adaptor_closure { } private: - dr::index<> idx_; + experimental::dr::index<> idx_; }; -inline auto slice(dr::index<> slice_indices) { +inline auto slice(experimental::dr::index<> slice_indices) { return slice_adaptor_closure(slice_indices); } } // namespace views -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp index 3f7e4449266..3b763c6a897 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp @@ -9,7 +9,7 @@ #include #include -namespace dr::shp::views { +namespace experimental::dr::shp::views { inline constexpr auto all = rng::views::all; @@ -17,10 +17,10 @@ inline constexpr auto counted = rng::views::counted; inline constexpr auto drop = rng::views::drop; -inline constexpr auto iota = dr::views::iota; +inline constexpr auto iota = experimental::dr::views::iota; inline constexpr auto take = rng::views::take; -inline constexpr auto transform = dr::views::transform; +inline constexpr auto transform = experimental::dr::views::transform; -} // namespace dr::shp::views +} // namespace experimental::dr::shp::views diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp index 81971f834bc..ff556646000 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp @@ -12,7 +12,7 @@ #include #include -namespace dr { +namespace experimental::dr { template struct is_owning_view : std::false_type {}; // template @@ -21,9 +21,9 @@ template struct is_owning_view : std::false_type {}; template inline constexpr bool is_owning_view_v = is_owning_view{}; -}; // namespace dr +}; // namespace experimental::dr -namespace dr::shp { +namespace experimental::dr::shp { namespace __detail { @@ -101,7 +101,7 @@ template class zip_accessor { }; template -using zip_iterator = dr::iterator_adaptor>; +using zip_iterator = experimental::dr::iterator_adaptor>; /// zip template @@ -137,8 +137,8 @@ class zip_view : public rng::view_interface> { template decltype(auto) get_view() const { auto &&view = std::get(views_); - if constexpr (dr::is_ref_view_v> || - dr::is_owning_view_v>) { + if constexpr (experimental::dr::is_ref_view_v> || + experimental::dr::is_owning_view_v>) { return view.base(); } else { return view; @@ -148,7 +148,7 @@ class zip_view : public rng::view_interface> { // If there is at least one distributed range, expose segments // of overlapping remote ranges. auto segments() const - requires(dr::distributed_range || ...) + requires(experimental::dr::distributed_range || ...) { std::array segment_ids; std::array local_idx; @@ -178,14 +178,14 @@ class zip_view : public rng::view_interface> { increment_local_idx(segment_ids, local_idx, size); } - return dr::__detail::owning_view(std::move(segment_views)); + return experimental::dr::__detail::owning_view(std::move(segment_views)); } // Return a range corresponding to each segment in `segments()`, // but with a tuple of the constituent ranges instead of a // `zip_view` of the ranges. auto zipped_segments() const - requires(dr::distributed_range || ...) + requires(experimental::dr::distributed_range || ...) { std::array segment_ids; std::array local_idx; @@ -214,11 +214,11 @@ class zip_view : public rng::view_interface> { increment_local_idx(segment_ids, local_idx, size); } - return dr::__detail::owning_view(std::move(segment_views)); + return experimental::dr::__detail::owning_view(std::move(segment_views)); } auto local() const noexcept - requires(!(dr::distributed_range || ...)) + requires(!(experimental::dr::distributed_range || ...)) { return local_impl_(std::make_index_sequence()); } @@ -228,8 +228,8 @@ class zip_view : public rng::view_interface> { // - There are no distributed ranges in the zip // Expose a rank. std::size_t rank() const - requires((dr::remote_range || ...) && - !(dr::distributed_range || ...)) + requires((experimental::dr::remote_range || ...) && + !(experimental::dr::distributed_range || ...)) { return get_rank_impl_<0, Rs...>(); } @@ -242,25 +242,25 @@ class zip_view : public rng::view_interface> { template std::size_t get_rank_impl_() const { static_assert(I < sizeof...(Rs)); - return dr::ranges::rank(get_view()); + return experimental::dr::ranges::rank(get_view()); } template requires(sizeof...(Rs_) > 0) std::size_t get_rank_impl_() const { static_assert(I < sizeof...(Rs)); - if constexpr (dr::remote_range) { - return dr::ranges::rank(get_view()); + if constexpr (experimental::dr::remote_range) { + return experimental::dr::ranges::rank(get_view()); } else { return get_rank_impl_(); } } template auto create_view_impl_(T &&t) const { - if constexpr (dr::remote_range) { - return dr::shp::device_span(std::forward(t)); + if constexpr (experimental::dr::remote_range) { + return experimental::dr::shp::device_span(std::forward(t)); } else { - return dr::shp::span(std::forward(t)); + return experimental::dr::shp::span(std::forward(t)); } } @@ -307,9 +307,9 @@ class zip_view : public rng::view_interface> { rng::begin(std::get(views_))...); } - template + template decltype(auto) segment_or_orig_(T &&t, std::size_t idx) const { - return dr::ranges::segments(t)[idx]; + return experimental::dr::ranges::segments(t)[idx]; } template @@ -341,9 +341,9 @@ namespace views { /// Zip template auto zip(Rs &&...rs) { - return dr::shp::zip_view(std::forward(rs)...); + return experimental::dr::shp::zip_view(std::forward(rs)...); } } // namespace views -} // namespace dr::shp +} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp index 3301bfa0c1e..82a227ef445 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp @@ -4,7 +4,7 @@ #pragma once -namespace dr::views { +namespace experimental::dr::views { // // range-v3 iota uses sentinels that are not the same type as the @@ -24,4 +24,4 @@ struct iota_fn_ { inline constexpr auto iota = iota_fn_{}; -} // namespace dr::views +} // namespace experimental::dr::views diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp index cbf35f084e4..d8a3a23bfc9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp @@ -11,7 +11,7 @@ #include #include -namespace dr { +namespace experimental::dr { template class transform_iterator { @@ -106,9 +106,9 @@ class transform_iterator { } auto local() const - requires(dr::ranges::__detail::has_local) + requires(experimental::dr::ranges::__detail::has_local) { - auto iter = dr::ranges::__detail::local(iter_); + auto iter = experimental::dr::ranges::__detail::local(iter_); return transform_iterator(iter, fn_); } @@ -135,10 +135,10 @@ class transform_view : public rng::view_interface> { } auto segments() const - requires(dr::distributed_range) + requires(experimental::dr::distributed_range) { auto fn = fn_; - return dr::ranges::segments(base_) | + return experimental::dr::ranges::segments(base_) | rng::views::transform([fn](T &&segment) { return transform_view, F>( std::forward(segment), fn); @@ -146,9 +146,9 @@ class transform_view : public rng::view_interface> { } auto rank() const - requires(dr::remote_range) + requires(experimental::dr::remote_range) { - return dr::ranges::rank(base_); + return experimental::dr::ranges::rank(base_); } V base() const { return base_; } @@ -168,7 +168,7 @@ template class transform_adapter_closure { transform_adapter_closure(F fn) : fn_(fn) {} template auto operator()(R &&r) const { - return dr::transform_view(std::forward(r), fn_); + return experimental::dr::transform_view(std::forward(r), fn_); } template @@ -195,13 +195,13 @@ class transform_fn_ { inline constexpr auto transform = transform_fn_{}; } // namespace views -} // namespace dr +} // namespace experimental::dr #if !defined(DR_SPEC) // Needed to satisfy rng::viewable_range template -inline constexpr bool rng::enable_borrowed_range> = +inline constexpr bool rng::enable_borrowed_range> = true; #endif diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp index 72cf8ea162f..6ba6ef64806 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp @@ -7,12 +7,12 @@ #include #include -namespace dr { +namespace experimental::dr { // returns range: [(rank, element) ...] -auto ranked_view(const dr::distributed_range auto &r) { - auto rank = [](auto &&v) { return dr::ranges::rank(&v); }; +auto ranked_view(const experimental::dr::distributed_range auto &r) { + auto rank = [](auto &&v) { return experimental::dr::ranges::rank(&v); }; return rng::views::zip(rng::views::transform(r, rank), r); } -} // namespace dr +} // namespace experimental::dr diff --git a/test/distributed-ranges/common/counted.cpp b/test/distributed-ranges/common/counted.cpp index a471ddda762..c7e7ef6355e 100644 --- a/test/distributed-ranges/common/counted.cpp +++ b/test/distributed-ranges/common/counted.cpp @@ -72,44 +72,44 @@ TYPED_TEST(Counted, countedOfOneElementHasOneSegmentAndSameRank) { TypeParam dv(10, 77); auto counted_view_result = xhp::views::counted(dv.end() - 1, 1); - auto counted_view_segments = dr::ranges::segments(counted_view_result); - auto dv_segments = dr::ranges::segments(dv); + auto counted_view_segments = experimental::dr::ranges::segments(counted_view_result); + auto dv_segments = experimental::dr::ranges::segments(dv); auto last_segment_index = dv_segments.size() - 1; EXPECT_TRUE(check_segments(counted_view_result)); EXPECT_EQ(rng::size(counted_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(counted_view_segments[0]), - dr::ranges::rank(dv_segments[last_segment_index])); + EXPECT_EQ(experimental::dr::ranges::rank(counted_view_segments[0]), + experimental::dr::ranges::rank(dv_segments[last_segment_index])); } TYPED_TEST(Counted, countedOfFirstSegementHasOneSegmentAndSameRank) { TypeParam dv(123456, 77); - const auto first_seg_size = dr::ranges::segments(dv)[0].size(); + const auto first_seg_size = experimental::dr::ranges::segments(dv)[0].size(); std::size_t bias = 2; // test assumes there are not too many ranks - assert(dr::ranges::segments(dv)[0].size() > bias); + assert(experimental::dr::ranges::segments(dv)[0].size() > bias); auto counted_view_result = xhp::views::counted(dv.begin() + bias, first_seg_size - bias); - auto counted_view_segments = dr::ranges::segments(counted_view_result); + auto counted_view_segments = experimental::dr::ranges::segments(counted_view_result); EXPECT_EQ(rng::size(counted_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(counted_view_segments[0]), - dr::ranges::rank(dr::ranges::segments(dv)[0])); + EXPECT_EQ(experimental::dr::ranges::rank(counted_view_segments[0]), + experimental::dr::ranges::rank(experimental::dr::ranges::segments(dv)[0])); } TYPED_TEST(Counted, countedOfAllButOneSizeHasAllSegmentsWithSameRanks) { TypeParam dv(EVENLY_DIVIDABLE_SIZE, 77); - auto dv_segments = dr::ranges::segments(dv); + auto dv_segments = experimental::dr::ranges::segments(dv); std::size_t bias = 1; // test assumes there are not too many ranks assert(dv_segments[0].size() > bias); auto counted_view_result = xhp::views::counted(dv.begin() + bias, EVENLY_DIVIDABLE_SIZE - bias); - auto counted_view_segments = dr::ranges::segments(counted_view_result); + auto counted_view_segments = experimental::dr::ranges::segments(counted_view_result); EXPECT_EQ(rng::size(dv_segments), rng::size(counted_view_segments)); for (std::size_t i = 0; i < rng::size(dv_segments); ++i) - EXPECT_EQ(dr::ranges::rank(dv_segments[i]), - dr::ranges::rank(counted_view_segments[i])); + EXPECT_EQ(experimental::dr::ranges::rank(dv_segments[i]), + experimental::dr::ranges::rank(counted_view_segments[i])); } diff --git a/test/distributed-ranges/common/distributed_vector.cpp b/test/distributed-ranges/common/distributed_vector.cpp index 721f7c49419..f3362706843 100644 --- a/test/distributed-ranges/common/distributed_vector.cpp +++ b/test/distributed-ranges/common/distributed_vector.cpp @@ -19,11 +19,11 @@ TYPED_TEST(DistributedVectorAllTypes, StaticAsserts) { static_assert(rng::viewable_range); static_assert(std::forward_iterator); - static_assert(dr::distributed_iterator); + static_assert(experimental::dr::distributed_iterator); static_assert(rng::forward_range); static_assert(rng::random_access_range); - static_assert(dr::distributed_contiguous_range); + static_assert(experimental::dr::distributed_contiguous_range); } TYPED_TEST(DistributedVectorAllTypes, getAndPut) { diff --git a/test/distributed-ranges/common/drop.cpp b/test/distributed-ranges/common/drop.cpp index ea13b600214..7fb271d0db3 100644 --- a/test/distributed-ranges/common/drop.cpp +++ b/test/distributed-ranges/common/drop.cpp @@ -78,48 +78,48 @@ TYPED_TEST(Drop, largeDropOfAllButOneHasSameSegmentAndRank) { auto drop_view_result = xhp::views::drop(dv, 123456 - 1); - auto drop_view_segments = dr::ranges::segments(drop_view_result); - auto dv_segments = dr::ranges::segments(dv); + auto drop_view_segments = experimental::dr::ranges::segments(drop_view_result); + auto dv_segments = experimental::dr::ranges::segments(dv); auto last_segment_index = dv_segments.size() - 1; EXPECT_TRUE(check_segments(drop_view_result)); EXPECT_EQ(rng::size(drop_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(drop_view_segments[0]), - dr::ranges::rank(dv_segments[last_segment_index])); + EXPECT_EQ(experimental::dr::ranges::rank(drop_view_segments[0]), + experimental::dr::ranges::rank(dv_segments[last_segment_index])); } TYPED_TEST(Drop, dropOfAllElementsButOneHasOneSegmentAndSameRank) { TypeParam dv(10, 77); auto drop_view_result = xhp::views::drop(dv, 9); - auto drop_view_segments = dr::ranges::segments(drop_view_result); - auto dv_segments = dr::ranges::segments(dv); + auto drop_view_segments = experimental::dr::ranges::segments(drop_view_result); + auto dv_segments = experimental::dr::ranges::segments(dv); auto last_segment_index = dv_segments.size() - 1; EXPECT_TRUE(check_segments(drop_view_result)); EXPECT_EQ(rng::size(drop_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(drop_view_segments[0]), - dr::ranges::rank(dv_segments[last_segment_index])); + EXPECT_EQ(experimental::dr::ranges::rank(drop_view_segments[0]), + experimental::dr::ranges::rank(dv_segments[last_segment_index])); } TYPED_TEST(Drop, dropOfFirstSegementHasSameSegmentsSize) { TypeParam dv(10, 77); - const auto first_seg_size = dr::ranges::segments(dv)[0].size(); + const auto first_seg_size = experimental::dr::ranges::segments(dv)[0].size(); auto drop_view_result = xhp::views::drop(dv, first_seg_size); - auto drop_view_segments = dr::ranges::segments(drop_view_result); - EXPECT_EQ(rng::size(drop_view_segments), dr::ranges::segments(dv).size() - 1); + auto drop_view_segments = experimental::dr::ranges::segments(drop_view_result); + EXPECT_EQ(rng::size(drop_view_segments), experimental::dr::ranges::segments(dv).size() - 1); } TYPED_TEST(Drop, dropOfOneElementHasAllSegmentsWithSameRanks) { TypeParam dv(EVENLY_DIVIDABLE_SIZE, 77); - auto dv_segments = dr::ranges::segments(dv); + auto dv_segments = experimental::dr::ranges::segments(dv); auto drop_view_result = xhp::views::drop(dv, 1); - auto drop_view_segments = dr::ranges::segments(drop_view_result); + auto drop_view_segments = experimental::dr::ranges::segments(drop_view_result); EXPECT_EQ(rng::size(dv_segments), rng::size(drop_view_segments)); for (std::size_t i = 0; i < rng::size(dv_segments); ++i) - EXPECT_EQ(dr::ranges::rank(dv_segments[i]), - dr::ranges::rank(drop_view_segments[i])); + EXPECT_EQ(experimental::dr::ranges::rank(dv_segments[i]), + experimental::dr::ranges::rank(drop_view_segments[i])); } diff --git a/test/distributed-ranges/common/iota_view.cpp b/test/distributed-ranges/common/iota_view.cpp index 39242a101f2..941d30b419d 100644 --- a/test/distributed-ranges/common/iota_view.cpp +++ b/test/distributed-ranges/common/iota_view.cpp @@ -12,7 +12,7 @@ TYPED_TEST_SUITE(IotaView, AllTypes); TYPED_TEST(IotaView, ZipWithDR) { xhp::distributed_vector dv(10); - auto v = dr::views::iota(1, 10); + auto v = experimental::dr::views::iota(1, 10); auto z = xhp::views::zip(dv, v); @@ -26,7 +26,7 @@ TYPED_TEST(IotaView, ZipWithDR) { TYPED_TEST(IotaView, Copy) { TypeParam dv(10); - auto v = dr::views::iota(1, 11); + auto v = experimental::dr::views::iota(1, 11); xhp::copy(v, dv.begin()); @@ -36,7 +36,7 @@ TYPED_TEST(IotaView, Copy) { TYPED_TEST(IotaView, Transform) { TypeParam dv(10); - auto v = dr::views::iota(1, 11); + auto v = experimental::dr::views::iota(1, 11); auto negate = [](auto v) { return -v; }; xhp::transform(v, dv.begin(), negate); @@ -47,7 +47,7 @@ TYPED_TEST(IotaView, Transform) { TYPED_TEST(IotaView, ForEach) { TypeParam dv(10); - auto v = dr::views::iota(1, 11); + auto v = experimental::dr::views::iota(1, 11); auto negate = [](auto v) { auto &[in, out] = v; diff --git a/test/distributed-ranges/common/sycl_utils.cpp b/test/distributed-ranges/common/sycl_utils.cpp index 2e4c58bcc81..99d4aab68a1 100644 --- a/test/distributed-ranges/common/sycl_utils.cpp +++ b/test/distributed-ranges/common/sycl_utils.cpp @@ -20,7 +20,7 @@ TEST(SYCLUtils, ParalelFor1D) { auto seta = [a](auto i) { a[i] = i; }; auto setb = [b](auto i) { b[i] = i; }; q.parallel_for(range, seta).wait(); - dr::__detail::parallel_for(q, range, setb).wait(); + experimental::dr::__detail::parallel_for(q, range, setb).wait(); EXPECT_EQ(rng::span(a, size), rng::span(b, size)); } @@ -45,7 +45,7 @@ TEST(SYCLUtils, ParalelFor2D) { auto setb = [mdb](auto index) { mdb(index[0], index[1]) = 22; }; q.parallel_for(range, seta).wait(); - dr::__detail::parallel_for(q, range, setb).wait(); + experimental::dr::__detail::parallel_for(q, range, setb).wait(); EXPECT_EQ(rng::span(a, size), rng::span(b, size)) << fmt::format("a:\n{}b:\n{}", mda, mdb); @@ -68,7 +68,7 @@ TEST(SYCLUtils, ParalelFor3D) { auto setb = [mdb](auto index) { mdb(index[0], index[1], index[2]) = 22; }; q.parallel_for(range, seta).wait(); - dr::__detail::parallel_for(q, range, setb).wait(); + experimental::dr::__detail::parallel_for(q, range, setb).wait(); EXPECT_EQ(rng::span(a, size), rng::span(b, size)) << fmt::format("a:\n{}b:\n{}", mda, mdb); diff --git a/test/distributed-ranges/common/take.cpp b/test/distributed-ranges/common/take.cpp index 114b1f01e36..a4c6ef2b0fc 100644 --- a/test/distributed-ranges/common/take.cpp +++ b/test/distributed-ranges/common/take.cpp @@ -73,38 +73,38 @@ TYPED_TEST(Take, takeOfOneElementHasOneSegmentAndSameRank) { TypeParam dv(10, 77); auto take_view_result = xhp::views::take(dv, 1); - auto take_view_segments = dr::ranges::segments(take_view_result); - auto dv_segments = dr::ranges::segments(dv); + auto take_view_segments = experimental::dr::ranges::segments(take_view_result); + auto dv_segments = experimental::dr::ranges::segments(dv); EXPECT_TRUE(check_segments(take_view_result)); EXPECT_EQ(rng::size(take_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(take_view_segments[0]), - dr::ranges::rank(dv_segments[0])); + EXPECT_EQ(experimental::dr::ranges::rank(take_view_segments[0]), + experimental::dr::ranges::rank(dv_segments[0])); } TYPED_TEST(Take, takeOfFirstSegementHasOneSegmentAndSameRank) { TypeParam dv(10, 77); - const auto first_seg_size = dr::ranges::segments(dv)[0].size(); + const auto first_seg_size = experimental::dr::ranges::segments(dv)[0].size(); auto take_view_result = xhp::views::take(dv, first_seg_size); - auto take_view_segments = dr::ranges::segments(take_view_result); + auto take_view_segments = experimental::dr::ranges::segments(take_view_result); EXPECT_EQ(rng::size(take_view_segments), 1); - EXPECT_EQ(dr::ranges::rank(take_view_segments[0]), - dr::ranges::rank(dr::ranges::segments(dv)[0])); + EXPECT_EQ(experimental::dr::ranges::rank(take_view_segments[0]), + experimental::dr::ranges::rank(experimental::dr::ranges::segments(dv)[0])); } template void takeHasSameSegments(std::size_t dv_size, std::size_t take_size) { TypeParam dv(dv_size, 77); - auto dv_segments = dr::ranges::segments(dv); + auto dv_segments = experimental::dr::ranges::segments(dv); auto take_view_result = xhp::views::take(dv, take_size); - auto take_view_segments = dr::ranges::segments(take_view_result); + auto take_view_segments = experimental::dr::ranges::segments(take_view_result); EXPECT_EQ(rng::size(dv_segments), rng::size(take_view_segments)); for (std::size_t i = 0; i < rng::size(dv_segments); ++i) - EXPECT_EQ(dr::ranges::rank(dv_segments[i]), - dr::ranges::rank(take_view_segments[i])); + EXPECT_EQ(experimental::dr::ranges::rank(dv_segments[i]), + experimental::dr::ranges::rank(take_view_segments[i])); } TYPED_TEST(Take, takeOfAllButOneSizeHasAllSegmentsWithSameRanks) { diff --git a/test/distributed-ranges/common/zip.cpp b/test/distributed-ranges/common/zip.cpp index 8c108c2cc63..d363807da02 100644 --- a/test/distributed-ranges/common/zip.cpp +++ b/test/distributed-ranges/common/zip.cpp @@ -55,7 +55,7 @@ TYPED_TEST(Zip, RangeSegments) { auto local = rng::views::zip(ops.vec); auto dist = test_zip(ops.dist_vec); - auto flat = rng::views::join(dr::ranges::segments(dist)); + auto flat = rng::views::join(experimental::dr::ranges::segments(dist)); EXPECT_TRUE(is_equal(local, flat)); } @@ -66,7 +66,7 @@ TYPED_TEST(Zip, IterSegments) { auto local = rng::views::zip(ops.vec); auto dist = test_zip(ops.dist_vec); - auto flat = rng::views::join(dr::ranges::segments(dist.begin())); + auto flat = rng::views::join(experimental::dr::ranges::segments(dist.begin())); EXPECT_TRUE(is_equal(local, flat)); } #endif @@ -77,7 +77,7 @@ TYPED_TEST(Zip, Drop) { auto local = rng::views::drop(rng::views::zip(ops.vec), 2); auto dist = xhp::views::drop(test_zip(ops.dist_vec), 2); - auto flat = rng::views::join(dr::ranges::segments(dist)); + auto flat = rng::views::join(experimental::dr::ranges::segments(dist)); EXPECT_EQ(local, dist); EXPECT_TRUE(is_equal(local, flat)); } @@ -179,7 +179,7 @@ TYPED_TEST(Zip, IotaStaticAssert) { static_assert(std::forward_iterator); using Dist = decltype(dist); static_assert(rng::forward_range); - static_assert(dr::distributed_range); + static_assert(experimental::dr::distributed_range); } TYPED_TEST(Zip, Iota) { diff --git a/test/distributed-ranges/include/common-tests.hpp b/test/distributed-ranges/include/common-tests.hpp index dda5be91531..4204107fac2 100644 --- a/test/distributed-ranges/include/common-tests.hpp +++ b/test/distributed-ranges/include/common-tests.hpp @@ -6,7 +6,7 @@ constexpr std::size_t EVENLY_DIVIDABLE_SIZE = 2 * 3 * 5 * 7 * 11 * 13; // good up to 16 processes -template +template using LocalVec = std::vector; struct AOS_Struct { @@ -174,7 +174,7 @@ bool contains_empty(auto &&r) { } std::string check_segments_message(auto &&r) { - auto segments = dr::ranges::segments(r); + auto segments = experimental::dr::ranges::segments(r); auto flat = rng::views::join(segments); if (contains_empty(segments) || !is_equal(r, flat)) { return fmt::format("\n" @@ -254,7 +254,7 @@ auto check_binary_check_op(rng::range auto &&a, rng::range auto &&b, } auto check_segments(std::forward_iterator auto di) { - auto segments = dr::ranges::segments(di); + auto segments = experimental::dr::ranges::segments(di); auto flat = rng::join_view(segments); if (contains_empty(segments) || !is_equal(di, flat)) { return testing::AssertionFailure() @@ -295,7 +295,7 @@ concept streamable = requires(std::ostream &os, T value) { { os << value } -> std::convertible_to; }; -namespace dr::mhp { +namespace experimental::dr::mhp { // gtest relies on ADL to find the printer template @@ -325,9 +325,9 @@ bool operator==(const xhp::distributed_vector &dist_vec, return is_equal(local_vec, dist_vec); } -} // namespace dr::mhp +} // namespace experimental::dr::mhp -namespace dr::shp { +namespace experimental::dr::shp { // gtest relies on ADL to find the printer template @@ -357,7 +357,7 @@ bool operator==(const xhp::distributed_vector &dist_vec, return is_equal(dist_vec, local_vec); } -} // namespace dr::shp +} // namespace experimental::dr::shp namespace DR_RANGES_NAMESPACE { diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index 17fa366ef16..52e4f76bfae 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -32,7 +32,7 @@ find_package(MKL REQUIRED) # include(GoogleTest) add_library(dr_shp INTERFACE) -add_library(DR::shp ALIAS dr_shp) +add_library(experimental::dr::shp ALIAS dr_shp) target_include_directories(dr_shp INTERFACE . vendor ../../../include) target_compile_definitions(dr_shp INTERFACE USE_MKL @@ -54,12 +54,8 @@ if($(HIPSYCL_TARGETS)) target_compile_options(dr_shp INTERFACE --hipsycl-targets='cuda:sm_75') endif() - set(CMAKE_INCLUDE_CURRENT_DIR ON) - - - add_executable( shp-tests shp-tests.cpp ../common/all.cpp ../common/copy.cpp ../common/counted.cpp @@ -74,11 +70,12 @@ add_executable( add_executable(shp-tests-3 shp-tests.cpp containers-3.cpp copy-3.cpp) # skeleton for rapid builds of individual tests, feel free to change this -add_executable(shp-quick-test shp-tests.cpp ../common/transform.cpp) -target_compile_definitions(shp-quick-test PRIVATE QUICK_TEST) +# add_executable(shp-quick-test shp-tests.cpp ../common/transform.cpp) +# target_compile_definitions(shp-quick-test PRIVATE QUICK_TEST) -foreach(test-exec IN ITEMS shp-tests shp-tests-3 shp-quick-test) - target_link_libraries(${test-exec} GTest::gtest_main DR::shp fmt::fmt cxxopts) +#foreach(test-exec IN ITEMS shp-tests shp-tests-3 shp-quick-test) +foreach(test-exec IN ITEMS shp-tests shp-tests-3) + target_link_libraries(${test-exec} GTest::gtest_main experimental::dr::shp fmt::fmt cxxopts) endforeach() add_custom_target(shp-all-tests) diff --git a/test/distributed-ranges/shp/algorithms.cpp b/test/distributed-ranges/shp/algorithms.cpp index eeea46d5663..3fe5839d6fe 100644 --- a/test/distributed-ranges/shp/algorithms.cpp +++ b/test/distributed-ranges/shp/algorithms.cpp @@ -5,7 +5,7 @@ #include "xhp-tests.hpp" using T = int; -using DV = dr::shp::distributed_vector>; +using DV = experimental::dr::shp::distributed_vector>; using V = std::vector; TEST(ShpTests, InclusiveScan_aligned) { @@ -13,17 +13,17 @@ TEST(ShpTests, InclusiveScan_aligned) { // With execution Policy { - dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> v(n); std::vector lv(n); // Range case, no binary op or init, perfectly aligned for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(dr::shp::par_unseq, v, v); + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v, v); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -33,10 +33,10 @@ TEST(ShpTests, InclusiveScan_aligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), v.begin()); + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), v.begin()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -45,17 +45,17 @@ TEST(ShpTests, InclusiveScan_aligned) { // Without execution policies { - dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> v(n); std::vector lv(n); // Range case, no binary op or init, perfectly aligned for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(v, v); + experimental::dr::shp::inclusive_scan(v, v); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -65,10 +65,10 @@ TEST(ShpTests, InclusiveScan_aligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(v.begin(), v.end(), v.begin()); + experimental::dr::shp::inclusive_scan(v.begin(), v.end(), v.begin()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -81,8 +81,8 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { // With execution policies { - dr::shp::distributed_vector> v(n); - dr::shp::distributed_vector> o( + experimental::dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> o( v.size() * 2); std::vector lv(n); @@ -90,10 +90,10 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(dr::shp::par_unseq, v, o, std::plus<>()); + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v, o, std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -103,11 +103,11 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::multiplies<>(), 12); - dr::shp::inclusive_scan(dr::shp::par_unseq, v, o, std::multiplies<>(), 12); + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v, o, std::multiplies<>(), 12); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -117,10 +117,10 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), o.begin(), + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), o.begin(), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -131,11 +131,11 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::multiplies<>(), 12); - dr::shp::inclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), o.begin(), + experimental::dr::shp::inclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), o.begin(), std::multiplies<>(), 12); for (std::size_t i = 0; i < lv.size(); i++) { @@ -145,8 +145,8 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { // Without execution policies { - dr::shp::distributed_vector> v(n); - dr::shp::distributed_vector> o( + experimental::dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> o( v.size() * 2); std::vector lv(n); @@ -154,10 +154,10 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(v, o, std::plus<>()); + experimental::dr::shp::inclusive_scan(v, o, std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -167,11 +167,11 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::multiplies<>(), 12); - dr::shp::inclusive_scan(v, o, std::multiplies<>(), 12); + experimental::dr::shp::inclusive_scan(v, o, std::multiplies<>(), 12); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -181,10 +181,10 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin()); - dr::shp::inclusive_scan(v.begin(), v.end(), o.begin(), std::plus<>()); + experimental::dr::shp::inclusive_scan(v.begin(), v.end(), o.begin(), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -194,11 +194,11 @@ TEST(ShpTests, DISABLED_InclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::inclusive_scan(lv.begin(), lv.end(), lv.begin(), std::multiplies<>(), 12); - dr::shp::inclusive_scan(v.begin(), v.end(), o.begin(), std::multiplies<>(), + experimental::dr::shp::inclusive_scan(v.begin(), v.end(), o.begin(), std::multiplies<>(), 12); for (std::size_t i = 0; i < lv.size(); i++) { @@ -212,17 +212,17 @@ TEST(ShpTests, ExclusiveScan_aligned) { // With execution Policy { - dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> v(n); std::vector lv(n); // Range case, no binary op or init, perfectly aligned for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(dr::shp::par_unseq, v, v, int(0)); + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v, v, int(0)); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -232,10 +232,10 @@ TEST(ShpTests, ExclusiveScan_aligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), v.begin(), + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), v.begin(), int(0)); for (std::size_t i = 0; i < lv.size(); i++) { @@ -245,17 +245,17 @@ TEST(ShpTests, ExclusiveScan_aligned) { // Without execution policies { - dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> v(n); std::vector lv(n); // Range case, no binary op or init, perfectly aligned for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(v, v, int(0)); + experimental::dr::shp::exclusive_scan(v, v, int(0)); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -265,10 +265,10 @@ TEST(ShpTests, ExclusiveScan_aligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(v.begin(), v.end(), v.begin(), int(0)); + experimental::dr::shp::exclusive_scan(v.begin(), v.end(), v.begin(), int(0)); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], v[i]); @@ -281,8 +281,8 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { // With execution policies { - dr::shp::distributed_vector> v(n); - dr::shp::distributed_vector> o( + experimental::dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> o( v.size() * 2); std::vector lv(n); @@ -290,10 +290,10 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(dr::shp::par_unseq, v, o, int(0), std::plus<>()); + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v, o, int(0), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -303,11 +303,11 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), 12, std::multiplies<>()); - dr::shp::exclusive_scan(dr::shp::par_unseq, v, o, 12, std::multiplies<>()); + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v, o, 12, std::multiplies<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -317,10 +317,10 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), o.begin(), + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), o.begin(), int(0), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -331,11 +331,11 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(12), std::multiplies<>()); - dr::shp::exclusive_scan(dr::shp::par_unseq, v.begin(), v.end(), o.begin(), + experimental::dr::shp::exclusive_scan(experimental::dr::shp::par_unseq, v.begin(), v.end(), o.begin(), int(12), std::multiplies<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -345,8 +345,8 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { // Without execution policies { - dr::shp::distributed_vector> v(n); - dr::shp::distributed_vector> o( + experimental::dr::shp::distributed_vector> v(n); + experimental::dr::shp::distributed_vector> o( v.size() * 2); std::vector lv(n); @@ -354,10 +354,10 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(12)); - dr::shp::exclusive_scan(v, o, int(12), std::plus<>()); + experimental::dr::shp::exclusive_scan(v, o, int(12), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -367,11 +367,11 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), 12, std::multiplies<>()); - dr::shp::exclusive_scan(v, o, 12, std::multiplies<>()); + experimental::dr::shp::exclusive_scan(v, o, 12, std::multiplies<>()); for (std::size_t i = 0; i < lv.size(); i++) { EXPECT_EQ(lv[i], o[i]); @@ -381,10 +381,10 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), int(0)); - dr::shp::exclusive_scan(v.begin(), v.end(), o.begin(), int(0), + experimental::dr::shp::exclusive_scan(v.begin(), v.end(), o.begin(), int(0), std::plus<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -395,11 +395,11 @@ TEST(ShpTests, DISABLED_ExclusiveScan_nonaligned) { for (auto &&x : lv) { x = lrand48() % 100; } - dr::shp::copy(lv.begin(), lv.end(), v.begin()); + experimental::dr::shp::copy(lv.begin(), lv.end(), v.begin()); std::exclusive_scan(lv.begin(), lv.end(), lv.begin(), 12, std::multiplies<>()); - dr::shp::exclusive_scan(v.begin(), v.end(), o.begin(), 12, + experimental::dr::shp::exclusive_scan(v.begin(), v.end(), o.begin(), 12, std::multiplies<>()); for (std::size_t i = 0; i < lv.size(); i++) { @@ -414,16 +414,16 @@ TEST(ShpTests, Sort) { for (std::size_t n : sizes) { std::vector l_v = generate_random(n, 100); - dr::shp::distributed_vector d_v(n); + experimental::dr::shp::distributed_vector d_v(n); - dr::shp::copy(l_v.begin(), l_v.end(), d_v.begin()); + experimental::dr::shp::copy(l_v.begin(), l_v.end(), d_v.begin()); std::sort(l_v.begin(), l_v.end()); - dr::shp::sort(d_v); + experimental::dr::shp::sort(d_v); std::vector d_v_l(n); - dr::shp::copy(d_v.begin(), d_v.end(), d_v_l.begin()); + experimental::dr::shp::copy(d_v.begin(), d_v.end(), d_v_l.begin()); for (std::size_t i = 0; i < l_v.size(); i++) { EXPECT_EQ(l_v[i], d_v_l[i]); diff --git a/test/distributed-ranges/shp/containers-3.cpp b/test/distributed-ranges/shp/containers-3.cpp index b42d7796e2e..6d09b96ae94 100644 --- a/test/distributed-ranges/shp/containers-3.cpp +++ b/test/distributed-ranges/shp/containers-3.cpp @@ -6,8 +6,8 @@ TYPED_TEST_SUITE(DistributedVectorTest, AllocatorTypes); TYPED_TEST(DistributedVectorTest, tests_from_this_file_run_on_3_devices) { - EXPECT_EQ(dr::shp::nprocs(), 3); - EXPECT_EQ(std::size(dr::shp::devices()), 3); + EXPECT_EQ(experimental::dr::shp::nprocs(), 3); + EXPECT_EQ(std::size(experimental::dr::shp::devices()), 3); } TYPED_TEST(DistributedVectorTest, segments_sizes_in_uneven_distribution) { diff --git a/test/distributed-ranges/shp/containers.cpp b/test/distributed-ranges/shp/containers.cpp index f32a4bc9cf5..96af37aa29f 100644 --- a/test/distributed-ranges/shp/containers.cpp +++ b/test/distributed-ranges/shp/containers.cpp @@ -23,7 +23,7 @@ TYPED_TEST(DistributedVectorTest, std::iota(dv.begin(), dv.end(), 20); auto second = dv.begin() + 2; - EXPECT_EQ(second[0], dr::ranges::segments(second)[0][0]); + EXPECT_EQ(second[0], experimental::dr::ranges::segments(second)[0][0]); } TYPED_TEST(DistributedVectorTest, fill_constructor) { @@ -87,7 +87,7 @@ TYPED_TEST(DistributedVectorTest, Iterator) { TYPED_TEST(DistributedVectorTest, Resize) { std::size_t size = 100; typename TestFixture::DistVec dv(size); - dr::shp::iota(dv.begin(), dv.end(), 20); + experimental::dr::shp::iota(dv.begin(), dv.end(), 20); typename TestFixture::LocalVec v(size); std::iota(v.begin(), v.end(), 20); @@ -107,11 +107,11 @@ TYPED_TEST(DistributedVectorTest, Resize) { template class DeviceVectorTest : public testing::Test { public: - using DeviceVec = dr::shp::device_vector; + using DeviceVec = experimental::dr::shp::device_vector; }; TYPED_TEST_SUITE(DeviceVectorTest, AllocatorTypes); TYPED_TEST(DeviceVectorTest, is_remote_contiguous_range) { - static_assert(dr::remote_contiguous_range); + static_assert(experimental::dr::remote_contiguous_range); } diff --git a/test/distributed-ranges/shp/containers.hpp b/test/distributed-ranges/shp/containers.hpp index e14c42b52dc..24d2c46be37 100644 --- a/test/distributed-ranges/shp/containers.hpp +++ b/test/distributed-ranges/shp/containers.hpp @@ -7,6 +7,6 @@ template class DistributedVectorTest : public testing::Test { public: using DistVec = - dr::shp::distributed_vector; + experimental::dr::shp::distributed_vector; using LocalVec = std::vector; }; diff --git a/test/distributed-ranges/shp/copy-3.cpp b/test/distributed-ranges/shp/copy-3.cpp index 877e843871a..b330f8cfd3a 100644 --- a/test/distributed-ranges/shp/copy-3.cpp +++ b/test/distributed-ranges/shp/copy-3.cpp @@ -6,8 +6,8 @@ TYPED_TEST_SUITE(CopyTest, AllocatorTypes); TYPED_TEST(CopyTest, tests_from_this_file_run_on_3_devices) { - EXPECT_EQ(dr::shp::nprocs(), 3); - EXPECT_EQ(rng::size(dr::shp::devices()), 3); + EXPECT_EQ(experimental::dr::shp::nprocs(), 3); + EXPECT_EQ(rng::size(experimental::dr::shp::devices()), 3); } TYPED_TEST(CopyTest, dist2local_wholesegment) { @@ -16,7 +16,7 @@ TYPED_TEST(CopyTest, dist2local_wholesegment) { 7, 8, 9, 10, 11, 12}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0}; - auto ret_it = dr::shp::copy(rng::begin(dist_vec) + 4, + auto ret_it = experimental::dr::shp::copy(rng::begin(dist_vec) + 4, rng::begin(dist_vec) + 8, rng::begin(local_vec)); EXPECT_TRUE(equal(local_vec, typename TestFixture::LocalVec{5, 6, 7, 8})); EXPECT_EQ(ret_it, rng::end(local_vec)); @@ -27,7 +27,7 @@ TYPED_TEST(CopyTest, local2dist_wholesegment) { const typename TestFixture::LocalVec local_vec = {50, 60, 70, 80}; typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; - auto ret_it = dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + auto ret_it = experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec) + 4); EXPECT_TRUE(equal(dist_vec, typename TestFixture::LocalVec{ 1, 2, 3, 4, 50, 60, 70, 80, 9, 10, 11, 12})); diff --git a/test/distributed-ranges/shp/copy.cpp b/test/distributed-ranges/shp/copy.cpp index 47744e41d95..6076a4e03cb 100644 --- a/test/distributed-ranges/shp/copy.cpp +++ b/test/distributed-ranges/shp/copy.cpp @@ -8,7 +8,7 @@ TYPED_TEST_SUITE(CopyTest, AllocatorTypes); TYPED_TEST(CopyTest, dist2local_async) { const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0}; - dr::shp::copy_async(rng::begin(dist_vec), rng::end(dist_vec), + experimental::dr::shp::copy_async(rng::begin(dist_vec), rng::end(dist_vec), rng::begin(local_vec)) .wait(); EXPECT_TRUE(equal(local_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5})); @@ -17,7 +17,7 @@ TYPED_TEST(CopyTest, dist2local_async) { TYPED_TEST(CopyTest, local2dist_async) { const typename TestFixture::LocalVec local_vec = {1, 2, 3, 4, 5}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0}; - dr::shp::copy_async(rng::begin(local_vec), rng::end(local_vec), + experimental::dr::shp::copy_async(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec)) .wait(); EXPECT_TRUE(equal(dist_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5})); @@ -26,7 +26,7 @@ TYPED_TEST(CopyTest, local2dist_async) { TYPED_TEST(CopyTest, dist2local_sync) { const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 9}; - auto ret_it = dr::shp::copy(rng::begin(dist_vec), rng::end(dist_vec), + auto ret_it = experimental::dr::shp::copy(rng::begin(dist_vec), rng::end(dist_vec), rng::begin(local_vec)); EXPECT_TRUE( equal(local_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 9})); @@ -36,7 +36,7 @@ TYPED_TEST(CopyTest, dist2local_sync) { TYPED_TEST(CopyTest, local2dist_sync) { const typename TestFixture::LocalVec local_vec = {1, 2, 3, 4, 5}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 9}; - auto ret_it = dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + auto ret_it = experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec)); EXPECT_TRUE( equal(dist_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 9})); @@ -46,7 +46,7 @@ TYPED_TEST(CopyTest, local2dist_sync) { TYPED_TEST(CopyTest, dist2local_range_sync) { const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 9}; - auto ret_it = dr::shp::copy(dist_vec, rng::begin(local_vec)); + auto ret_it = experimental::dr::shp::copy(dist_vec, rng::begin(local_vec)); EXPECT_TRUE( equal(local_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 9})); EXPECT_EQ(*ret_it, 9); @@ -55,7 +55,7 @@ TYPED_TEST(CopyTest, dist2local_range_sync) { TYPED_TEST(CopyTest, local2dist_range_sync) { const typename TestFixture::LocalVec local_vec = {1, 2, 3, 4, 5}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 9}; - auto ret_it = dr::shp::copy(local_vec, rng::begin(dist_vec)); + auto ret_it = experimental::dr::shp::copy(local_vec, rng::begin(dist_vec)); EXPECT_TRUE( equal(dist_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 9})); EXPECT_EQ(*ret_it, 9); @@ -65,10 +65,10 @@ TYPED_TEST(CopyTest, dist2local_async_can_interleave) { const typename TestFixture::DistVec dist_vec = {1, 2, 3, 4, 5}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 0, 0, 0}; auto event_1 = - dr::shp::copy_async(rng::begin(dist_vec) + 0, rng::begin(dist_vec) + 4, + experimental::dr::shp::copy_async(rng::begin(dist_vec) + 0, rng::begin(dist_vec) + 4, rng::begin(local_vec) + 0); auto event_2 = - dr::shp::copy_async(rng::begin(dist_vec) + 1, rng::begin(dist_vec) + 5, + experimental::dr::shp::copy_async(rng::begin(dist_vec) + 1, rng::begin(dist_vec) + 5, rng::begin(local_vec) + 4); event_1.wait(); event_2.wait(); @@ -80,9 +80,9 @@ TYPED_TEST(CopyTest, local2dist_async_can_interleave) { const typename TestFixture::LocalVec local_vec_1 = {1, 2, 3}; const typename TestFixture::LocalVec local_vec_2 = {4, 5}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0}; - auto event_1 = dr::shp::copy_async( + auto event_1 = experimental::dr::shp::copy_async( rng::begin(local_vec_1), rng::end(local_vec_1), rng::begin(dist_vec)); - auto event_2 = dr::shp::copy_async( + auto event_2 = experimental::dr::shp::copy_async( rng::begin(local_vec_2), rng::end(local_vec_2), rng::begin(dist_vec) + 3); event_1.wait(); event_2.wait(); @@ -94,7 +94,7 @@ TYPED_TEST(CopyTest, dist2local_sliced_bothSides) { 6, 7, 8, 9, 10}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(dist_vec) + 1, rng::end(dist_vec) - 1, + experimental::dr::shp::copy(rng::begin(dist_vec) + 1, rng::end(dist_vec) - 1, rng::begin(local_vec)); EXPECT_TRUE(equal( local_vec, typename TestFixture::LocalVec{2, 3, 4, 5, 6, 7, 8, 9, 0, 0})); @@ -105,7 +105,7 @@ TYPED_TEST(CopyTest, dist2local_sliced_left) { 6, 7, 8, 9, 10}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(dist_vec) + 1, rng::end(dist_vec), + experimental::dr::shp::copy(rng::begin(dist_vec) + 1, rng::end(dist_vec), rng::begin(local_vec)); EXPECT_TRUE(equal(local_vec, typename TestFixture::LocalVec{2, 3, 4, 5, 6, 7, 8, 9, 10, 0})); @@ -116,7 +116,7 @@ TYPED_TEST(CopyTest, dist2local_sliced_right) { 6, 7, 8, 9, 10}; typename TestFixture::LocalVec local_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(dist_vec), rng::end(dist_vec) - 1, + experimental::dr::shp::copy(rng::begin(dist_vec), rng::end(dist_vec) - 1, rng::begin(local_vec)); EXPECT_TRUE(equal( local_vec, typename TestFixture::LocalVec{1, 2, 3, 4, 5, 6, 7, 8, 9, 0})); @@ -126,7 +126,7 @@ TYPED_TEST(CopyTest, local2dist_sliced_bothSides) { const typename TestFixture::LocalVec local_vec = {2, 3, 4, 5, 6, 7, 8, 9}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec) + 1); EXPECT_TRUE(equal( dist_vec, typename TestFixture::LocalVec{0, 2, 3, 4, 5, 6, 7, 8, 9, 0})); @@ -136,7 +136,7 @@ TYPED_TEST(CopyTest, local2dist_sliced_left) { const typename TestFixture::LocalVec local_vec = {2, 3, 4, 5, 6, 7, 8, 9}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec) + 2); EXPECT_TRUE(equal( dist_vec, typename TestFixture::LocalVec{0, 0, 2, 3, 4, 5, 6, 7, 8, 9})); @@ -146,7 +146,7 @@ TYPED_TEST(CopyTest, local2dist_sliced_right) { const typename TestFixture::LocalVec local_vec = {2, 3, 4, 5, 6, 7, 8, 9}; typename TestFixture::DistVec dist_vec = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), + experimental::dr::shp::copy(rng::begin(local_vec), rng::end(local_vec), rng::begin(dist_vec)); EXPECT_TRUE(equal( dist_vec, typename TestFixture::LocalVec{2, 3, 4, 5, 6, 7, 8, 9, 0, 0})); diff --git a/test/distributed-ranges/shp/copy.hpp b/test/distributed-ranges/shp/copy.hpp index babf02f9958..6d072a6a271 100644 --- a/test/distributed-ranges/shp/copy.hpp +++ b/test/distributed-ranges/shp/copy.hpp @@ -7,6 +7,6 @@ template class CopyTest : public testing::Test { public: using DistVec = - dr::shp::distributed_vector; + experimental::dr::shp::distributed_vector; using LocalVec = std::vector; }; diff --git a/test/distributed-ranges/shp/detail.cpp b/test/distributed-ranges/shp/detail.cpp index 145fad179cb..b02240212c0 100644 --- a/test/distributed-ranges/shp/detail.cpp +++ b/test/distributed-ranges/shp/detail.cpp @@ -5,7 +5,7 @@ #include "xhp-tests.hpp" #include -namespace shp = dr::shp; +namespace shp = experimental::dr::shp; TEST(DetailTest, parallel_for) { std::size_t size = 2 * 1024 * 1024; @@ -26,7 +26,7 @@ TEST(DetailTest, parallel_for) { auto dv = dvec.data(); - dr::__detail::parallel_for(q, n, [=](auto i) { + experimental::dr::__detail::parallel_for(q, n, [=](auto i) { sycl::atomic_ref v(dv[i % size]); diff --git a/test/distributed-ranges/shp/fill.cpp b/test/distributed-ranges/shp/fill.cpp index b21b4390d55..d08b2c8c777 100644 --- a/test/distributed-ranges/shp/fill.cpp +++ b/test/distributed-ranges/shp/fill.cpp @@ -6,7 +6,7 @@ template class FillTest : public testing::Test { public: using DistVec = - dr::shp::distributed_vector; + experimental::dr::shp::distributed_vector; using LocalVec = std::vector; }; @@ -18,7 +18,7 @@ TYPED_TEST(FillTest, fill_all) { auto segments = dist_vec.segments(); int value = 1; for (auto &&segment : segments) { - dr::shp::fill(segment.begin(), segment.end(), value); + experimental::dr::shp::fill(segment.begin(), segment.end(), value); } EXPECT_TRUE(equal( dist_vec, typename TestFixture::DistVec{1, 1, 1, 1, 1, 1, 1, 1, 1, 1})); diff --git a/test/distributed-ranges/shp/gemv.cpp b/test/distributed-ranges/shp/gemv.cpp index 11cc779ff89..b0d5dce4195 100644 --- a/test/distributed-ranges/shp/gemv.cpp +++ b/test/distributed-ranges/shp/gemv.cpp @@ -8,19 +8,19 @@ TEST(SparseMatrix, Gemv) { std::size_t m = 100; std::size_t k = 100; - dr::shp::sparse_matrix a( + experimental::dr::shp::sparse_matrix a( {m, k}, 0.1f, - dr::shp::block_cyclic({dr::shp::tile::div, dr::shp::tile::div}, - {dr::shp::nprocs(), 1})); + experimental::dr::shp::block_cyclic({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, + {experimental::dr::shp::nprocs(), 1})); - dr::shp::distributed_vector b(k, 1.f); - dr::shp::distributed_vector c(m, 0.f); + experimental::dr::shp::distributed_vector b(k, 1.f); + experimental::dr::shp::distributed_vector c(m, 0.f); - dr::shp::gemv(c, a, b); + experimental::dr::shp::gemv(c, a, b); std::vector c_local(m); - dr::shp::copy(c.begin(), c.end(), c_local.begin()); + experimental::dr::shp::copy(c.begin(), c.end(), c_local.begin()); std::vector c_ref(m, 0.f); diff --git a/test/distributed-ranges/shp/shp-tests.cpp b/test/distributed-ranges/shp/shp-tests.cpp index d8856337ef5..894b9c0f37e 100644 --- a/test/distributed-ranges/shp/shp-tests.cpp +++ b/test/distributed-ranges/shp/shp-tests.cpp @@ -29,7 +29,7 @@ int main(int argc, char *argv[]) { } const unsigned int dev_num = options["devicesCount"].as(); - auto devices = dr::shp::get_numa_devices(sycl::default_selector_v); + auto devices = experimental::dr::shp::get_numa_devices(sycl::default_selector_v); if (dev_num > 0) { unsigned int i = 0; @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) { devices.resize(dev_num); // if too many devices } - dr::shp::init(devices); + experimental::dr::shp::init(devices); for (auto &device : devices) { std::cout << " Device: " << device.get_info() diff --git a/test/distributed-ranges/shp/transform.cpp b/test/distributed-ranges/shp/transform.cpp index 43b80ae3cd7..ea919562419 100644 --- a/test/distributed-ranges/shp/transform.cpp +++ b/test/distributed-ranges/shp/transform.cpp @@ -6,7 +6,7 @@ template class TransformTest : public testing::Test { public: using DistVec = - dr::shp::distributed_vector; + experimental::dr::shp::distributed_vector; using LocalVec = std::vector; constexpr static const auto add_10_func = [](auto x) { return x + 10; }; }; @@ -16,7 +16,7 @@ TYPED_TEST_SUITE(TransformTest, AllocatorTypes); TYPED_TEST(TransformTest, whole_aligned) { const typename TestFixture::DistVec a = {0, 1, 2, 3, 4}; typename TestFixture::DistVec b = {9, 9, 9, 9, 9}; - auto r = dr::shp::transform(dr::shp::par_unseq, a, rng::begin(b), + auto r = experimental::dr::shp::transform(experimental::dr::shp::par_unseq, a, rng::begin(b), TestFixture::add_10_func); EXPECT_EQ(r.in, a.end()); EXPECT_EQ(r.out, b.end()); @@ -29,7 +29,7 @@ TYPED_TEST(TransformTest, whole_non_aligned) { typename TestFixture::DistVec b = {50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60}; - auto r = dr::shp::transform(dr::shp::par_unseq, a, rng::begin(b), + auto r = experimental::dr::shp::transform(experimental::dr::shp::par_unseq, a, rng::begin(b), TestFixture::add_10_func); EXPECT_EQ(r.in, a.end()); EXPECT_EQ(*r.out, 55); @@ -42,8 +42,8 @@ TYPED_TEST(TransformTest, part_aligned) { const typename TestFixture::DistVec a = {0, 1, 2, 3, 4}; typename TestFixture::DistVec b = {9, 9, 9, 9, 9}; - auto [r_in, r_out] = dr::shp::transform( - dr::shp::par_unseq, rng::subrange(++rng::begin(a), --rng::end(a)), + auto [r_in, r_out] = experimental::dr::shp::transform( + experimental::dr::shp::par_unseq, rng::subrange(++rng::begin(a), --rng::end(a)), ++rng::begin(b), TestFixture::add_10_func); EXPECT_EQ(*r_in, 4); EXPECT_EQ(*r_out, 9); @@ -55,8 +55,8 @@ TYPED_TEST(TransformTest, part_not_aligned) { const typename TestFixture::DistVec a = {0, 1, 2, 3}; typename TestFixture::DistVec b = {9, 9, 9, 9, 9, 9, 9, 9, 9}; - auto [r_in, r_out] = dr::shp::transform( - dr::shp::par_unseq, rng::subrange(++rng::begin(a), rng::end(a)), + auto [r_in, r_out] = experimental::dr::shp::transform( + experimental::dr::shp::par_unseq, rng::subrange(++rng::begin(a), rng::end(a)), rng::begin(b) + 5, TestFixture::add_10_func); EXPECT_EQ(r_in, a.end()); EXPECT_EQ(r_out, rng::begin(b) + 8); // initial shift in b + subrange size @@ -67,7 +67,7 @@ TYPED_TEST(TransformTest, part_not_aligned) { TYPED_TEST(TransformTest, inplace_whole) { typename TestFixture::DistVec a = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - auto [r_in, r_out] = dr::shp::transform(dr::shp::par_unseq, a, rng::begin(a), + auto [r_in, r_out] = experimental::dr::shp::transform(experimental::dr::shp::par_unseq, a, rng::begin(a), TestFixture::add_10_func); EXPECT_EQ(r_in, rng::end(a)); EXPECT_EQ(r_out, rng::end(a)); @@ -77,8 +77,8 @@ TYPED_TEST(TransformTest, inplace_whole) { TYPED_TEST(TransformTest, inplace_part) { typename TestFixture::DistVec a = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - auto [r_in, r_out] = dr::shp::transform( - dr::shp::par_unseq, rng::subrange(++rng::begin(a), --rng::end(a)), + auto [r_in, r_out] = experimental::dr::shp::transform( + experimental::dr::shp::par_unseq, rng::subrange(++rng::begin(a), --rng::end(a)), ++rng::begin(a), TestFixture::add_10_func); EXPECT_EQ(*r_in, 8); EXPECT_EQ(r_out, --rng::end(a)); @@ -89,7 +89,7 @@ TYPED_TEST(TransformTest, inplace_part) { TYPED_TEST(TransformTest, large_aligned_whole) { const typename TestFixture::DistVec a(12345, 7); typename TestFixture::DistVec b(12345, 3); - dr::shp::transform(dr::shp::par_unseq, a, rng::begin(b), + experimental::dr::shp::transform(experimental::dr::shp::par_unseq, a, rng::begin(b), TestFixture::add_10_func); EXPECT_EQ(b[0], 17); @@ -110,7 +110,7 @@ TYPED_TEST(TransformTest, large_aligned_whole) { TYPED_TEST(TransformTest, large_aligned_part) { const typename TestFixture::DistVec a(12345, 7); typename TestFixture::DistVec b(12345, 3); - dr::shp::transform(dr::shp::par_unseq, + experimental::dr::shp::transform(experimental::dr::shp::par_unseq, rng::subrange(rng::begin(a) + 1000, rng::begin(a) + 1005), rng::begin(b) + 1000, TestFixture::add_10_func); @@ -127,7 +127,7 @@ TYPED_TEST(TransformTest, large_aligned_part) { TYPED_TEST(TransformTest, large_aligned_part_shifted) { const typename TestFixture::DistVec a(12345, 7); typename TestFixture::DistVec b(12345, 3); - dr::shp::transform(dr::shp::par_unseq, + experimental::dr::shp::transform(experimental::dr::shp::par_unseq, rng::subrange(rng::begin(a) + 1000, rng::begin(a) + 1005), rng::begin(b) + 999, TestFixture::add_10_func); @@ -144,7 +144,7 @@ TYPED_TEST(TransformTest, large_aligned_part_shifted) { TYPED_TEST(TransformTest, large_not_aligned) { const typename TestFixture::DistVec a(10000, 7); typename TestFixture::DistVec b(17000, 3); - dr::shp::transform(dr::shp::par_unseq, + experimental::dr::shp::transform(experimental::dr::shp::par_unseq, rng::subrange(rng::begin(a) + 2000, rng::begin(a) + 9000), rng::begin(b) + 9000, TestFixture::add_10_func); @@ -164,8 +164,8 @@ TYPED_TEST(TransformTest, large_not_aligned) { TYPED_TEST(TransformTest, large_inplace) { typename TestFixture::DistVec a(77000, 7); - auto r = dr::shp::transform( - dr::shp::par_unseq, + auto r = experimental::dr::shp::transform( + experimental::dr::shp::par_unseq, rng::subrange(rng::begin(a) + 22222, rng::begin(a) + 55555), rng::begin(a) + 22222, TestFixture::add_10_func); diff --git a/test/distributed-ranges/shp/xhp-tests.hpp b/test/distributed-ranges/shp/xhp-tests.hpp index 96fa25e52df..10bdb454831 100644 --- a/test/distributed-ranges/shp/xhp-tests.hpp +++ b/test/distributed-ranges/shp/xhp-tests.hpp @@ -17,18 +17,18 @@ const std::size_t comm_rank = 0; const std::size_t comm_size = 1; // Namespace aliases and wrapper functions to make the tests uniform -namespace xhp = dr::shp; +namespace xhp = experimental::dr::shp; inline void barrier() {} inline void fence() {} inline void fence_on(auto &&) {} -using AllocatorTypes = ::testing::Types>; +using AllocatorTypes = ::testing::Types>; template concept compliant_view = rng::forward_range && requires(V &v) { - dr::ranges::segments(v); - dr::ranges::rank(dr::ranges::segments(v)[0]); + experimental::dr::ranges::segments(v); + experimental::dr::ranges::rank(experimental::dr::ranges::segments(v)[0]); }; #include "../include/common-tests.hpp" From d5cfe25e7b1b43dcdd64780eb4b73727b6ae0054 Mon Sep 17 00:00:00 2001 From: Sergey Kopienko Date: Thu, 21 Mar 2024 15:50:01 +0100 Subject: [PATCH 09/29] [oneDPL] Pack `__parallel_for` and etc. calls into `__internal::__except_handler` (#1458) --- include/oneapi/dpl/pstl/algorithm_impl.h | 223 ++++++++++-------- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 4 +- .../hetero/algorithm_ranges_impl_hetero.h | 2 +- .../dpl/pstl/hetero/numeric_impl_hetero.h | 50 ++-- include/oneapi/dpl/pstl/numeric_impl.h | 23 +- 5 files changed, 160 insertions(+), 142 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index d0be4420f8c..1cb2c4f38a6 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -1719,11 +1719,14 @@ __pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomA if (__first == __last) return; - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, - [__first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { - __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), _IsVector{}); - }); + __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2, + [__first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last) { + __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), + _IsVector{}); + }); + }); } //------------------------------------------------------------------------ @@ -1772,13 +1775,15 @@ __pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ra if (__len == 0) return __d_first; - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last) { - __internal::__brick_reverse_copy(__inner_first, __inner_last, - __d_first + (__len - (__inner_last - __first)), _IsVector{}); - }); - return __d_first + __len; + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last) { + __internal::__brick_reverse_copy(__inner_first, __inner_last, + __d_first + (__len - (__inner_last - __first)), _IsVector{}); + }); + return __d_first + __len; + }); } //------------------------------------------------------------------------ @@ -1950,29 +1955,31 @@ __pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Ran { using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__first, __last, __middle, __result](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { - __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy> __copy{}; - if (__b > __middle) - { - __copy(__b, __e, __result + (__b - __middle), _IsVector{}); - } - else - { - _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first)); - if (__e < __middle) + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, + [__first, __last, __middle, __result](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { + __internal::__brick_copy<__parallel_tag<_IsVector>, _ExecutionPolicy> __copy{}; + if (__b > __middle) { - __copy(__b, __e, __new_result, _IsVector{}); + __copy(__b, __e, __result + (__b - __middle), _IsVector{}); } else { - __copy(__b, __middle, __new_result, _IsVector{}); - __copy(__middle, __e, __result, _IsVector{}); + _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first)); + if (__e < __middle) + { + __copy(__b, __e, __new_result, _IsVector{}); + } + else + { + __copy(__b, __middle, __new_result, _IsVector{}); + __copy(__middle, __e, __result, _IsVector{}); + } } - } - }); - return __result + (__last - __first); + }); + return __result + (__last - __first); + }); } //------------------------------------------------------------------------ @@ -3025,13 +3032,16 @@ __pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _RandomAcc { using __backend_tag = typename __parallel_tag<_IsVector>::__backend_tag; - __par_backend::__parallel_merge( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, - __comp, - [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, - _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, - _Compare __comp) { return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, _IsVector{}); }); - return __d_first + (__last1 - __first1) + (__last2 - __first2); + return __internal::__except_handler([&]() { + __par_backend::__parallel_merge( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first, + __comp, + [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2, + _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp) { + return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, _IsVector{}); + }); + return __d_first + (__last1 - __first1) + (__last2 - __first2); + }); } //------------------------------------------------------------------------ @@ -3533,29 +3543,33 @@ __pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& _ if (__m1 > __set_algo_cut_off) { //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2) - return __internal::__parallel_set_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, - __result, __comp); - }); + return __internal::__except_handler([&]() { + return __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, + __result, __comp); + }); + }); } const auto __m2 = __last2 - __left_bound_seq_2 + __n1; if (__m2 > __set_algo_cut_off) { //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1) - __result = __internal::__parallel_set_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result, - __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, - __result, __comp); - }); - return __result; + return __internal::__except_handler([&]() { + __result = __internal::__parallel_set_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, + __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return ::std::min(__n, __m); }, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, + __result, __comp); + }); + return __result; + }); } // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm @@ -3701,13 +3715,15 @@ __pattern_set_symmetric_difference(__parallel_tag<_IsVector> __tag, _ExecutionPo return ::std::set_symmetric_difference(__first1, __last1, __first2, __last2, __result, __comp); typedef typename ::std::iterator_traits<_RandomAccessIterator3>::value_type _T; - return __internal::__parallel_set_union_op( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, - [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, - _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { - return oneapi::dpl::__utils::__set_symmetric_difference_construct( - __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); - }); + return __internal::__except_handler([&]() { + return __internal::__parallel_set_union_op( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp, + [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, + _RandomAccessIterator2 __last2, _T* __result, _Compare __comp) { + return oneapi::dpl::__utils::__set_symmetric_difference_construct( + __first1, __last1, __first2, __last2, __result, __comp, __BrickCopyConstruct<_IsVector>()); + }); + }); } //------------------------------------------------------------------------ @@ -4129,30 +4145,33 @@ __pattern_lexicographical_compare(__parallel_tag<_IsVector> __tag, _ExecutionPol { typedef typename ::std::iterator_traits<_RandomAccessIterator1>::reference _RefType1; typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _RefType2; - --__last1; - --__last2; - auto __n = ::std::min(__last1 - __first1, __last2 - __first2); - auto __result = __internal::__parallel_find( - __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, - [__first1, __first2, &__comp](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { - return __internal::__brick_mismatch( - __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), - [&__comp](const _RefType1 __x, const _RefType2 __y) { - return !__comp(__x, __y) && !__comp(__y, __x); - }, - _IsVector{}) - .first; - }, - ::std::true_type{}); - if (__result == __last1 && __first2 + (__result - __first1) != __last2) - { // if first sequence shorter than second - return !__comp(*(__first2 + (__result - __first1)), *__result); - } - else - { // if second sequence shorter than first or both have the same number of elements - return __comp(*__result, *(__first2 + (__result - __first1))); - } + return __internal::__except_handler([&]() { + --__last1; + --__last2; + auto __n = ::std::min(__last1 - __first1, __last2 - __first2); + auto __result = __internal::__parallel_find( + __tag, ::std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n, + [__first1, __first2, &__comp](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) { + return __internal::__brick_mismatch( + __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1), + [&__comp](const _RefType1 __x, const _RefType2 __y) { + return !__comp(__x, __y) && !__comp(__y, __x); + }, + _IsVector{}) + .first; + }, + ::std::true_type{}); + + if (__result == __last1 && __first2 + (__result - __first1) != __last2) + { // if first sequence shorter than second + return !__comp(*(__first2 + (__result - __first1)), *__result); + } + else + { // if second sequence shorter than first or both have the same number of elements + return __comp(*__result, *(__first2 + (__result - __first1))); + } + }); } } @@ -4268,30 +4287,32 @@ __pattern_shift_left(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rand _DiffType __mid = __size / 2 + __size % 2; _DiffType __size_res = __size - __n; - //1. n >= size/2; there is enough memory to 'total' parallel copying - if (__n >= __mid) - { - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __size, - [__first, __n](_DiffType __i, _DiffType __j) { - __brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( - __first + __i, __first + __j, __first + __i - __n, _IsVector{}); - }); - } - else //2. n < size/2; there is not enough memory to parallel copying; doing parallel copying by n elements - { - //TODO: to consider parallel processing by the 'internal' loop (but we may probably get cache locality issues) - for (auto __k = __n; __k < __size; __k += __n) + return __internal::__except_handler([&]() { + //1. n >= size/2; there is enough memory to 'total' parallel copying + if (__n >= __mid) { - auto __end = ::std::min(__k + __n, __size); - __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __k, __end, + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __n, __size, [__first, __n](_DiffType __i, _DiffType __j) { __brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( __first + __i, __first + __j, __first + __i - __n, _IsVector{}); }); } - } + else //2. n < size/2; there is not enough memory to parallel copying; doing parallel copying by n elements + { + //TODO: to consider parallel processing by the 'internal' loop (but we may probably get cache locality issues) + for (auto __k = __n; __k < __size; __k += __n) + { + auto __end = ::std::min(__k + __n, __size); + __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __k, __end, + [__first, __n](_DiffType __i, _DiffType __j) { + __brick_move<__parallel_tag<_IsVector>, _ExecutionPolicy>{}( + __first + __i, __first + __j, __first + __i - __n, _IsVector{}); + }); + } + } - return __first + __size_res; + return __first + __size_res; + }); } template diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index a630c1cc2dd..4374089568a 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -573,7 +573,7 @@ __pattern_adjacent_find(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _I auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator>(); auto __buf2 = __keep2(__first + 1, __last); - // TODO: in case of confilicting names + // TODO: in case of conflicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() bool result = __par_backend_hetero::__parallel_find_or( _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), @@ -688,7 +688,7 @@ __pattern_equal(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator1 auto __keep2 = oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _Iterator2>(); auto __buf2 = __keep2(__first2, __last2); - // TODO: in case of confilicting names + // TODO: in case of conflicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !__par_backend_hetero::__parallel_find_or( _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h index 25ce34a8680..b9064b204d2 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h @@ -103,7 +103,7 @@ __pattern_equal(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& using _Predicate = oneapi::dpl::unseq_backend::single_match_pred<_ExecutionPolicy, equal_predicate<_Pred>>; - // TODO: in case of confilicting names + // TODO: in case of conflicting names // __par_backend_hetero::make_wrapped_policy<__par_backend_hetero::__or_policy_wrapper>() return !oneapi::dpl::__par_backend_hetero::__parallel_find_or( _BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), _Predicate{equal_predicate<_Pred>{__pred}}, diff --git a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h index 23beae41c10..ff7d9581e42 100644 --- a/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/numeric_impl_hetero.h @@ -243,40 +243,34 @@ __pattern_adjacent_difference(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy& // if we have the only element, just copy it according to the specification if (__n == 1) { - return __internal::__except_handler([__tag, &__exec, __first, __last, __d_first, __d_last, &__op]() { - auto __wrapped_policy = __par_backend_hetero::make_wrapped_policy( - ::std::forward<_ExecutionPolicy>(__exec)); + auto __wrapped_policy = __par_backend_hetero::make_wrapped_policy( + ::std::forward<_ExecutionPolicy>(__exec)); - __internal::__pattern_walk2_brick(__tag, __wrapped_policy, __first, __last, __d_first, - __internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); - - return __d_last; - }); + __internal::__pattern_walk2_brick(__tag, __wrapped_policy, __first, __last, __d_first, + __internal::__brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } else #endif { - return __internal::__except_handler([&__exec, __first, __last, __d_first, __d_last, &__op, __n]() { - auto __fn = [__op](_It1ValueT __in1, _It1ValueT __in2, _It2ValueTRef __out1) { - __out1 = __op(__in2, __in1); // This move assignment is allowed by the C++ standard draft N4810 - }; - - auto __keep1 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); - auto __buf1 = __keep1(__first, __last); - auto __keep2 = - oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator2>(); - auto __buf2 = __keep2(__d_first, __d_last); - - using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; - - oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, __exec, _Function{__fn}, __n, - __buf1.all_view(), __buf2.all_view()) - .wait(); - - return __d_last; - }); + auto __fn = [__op](_It1ValueT __in1, _It1ValueT __in2, _It2ValueTRef __out1) { + __out1 = __op(__in2, __in1); // This move assignment is allowed by the C++ standard draft N4810 + }; + + auto __keep1 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::read, _ForwardIterator1>(); + auto __buf1 = __keep1(__first, __last); + auto __keep2 = + oneapi::dpl::__ranges::__get_sycl_range<__par_backend_hetero::access_mode::write, _ForwardIterator2>(); + auto __buf2 = __keep2(__d_first, __d_last); + + using _Function = unseq_backend::walk_adjacent_difference<_ExecutionPolicy, decltype(__fn)>; + + oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, __exec, _Function{__fn}, __n, + __buf1.all_view(), __buf2.all_view()) + .wait(); } + + return __d_last; } } // namespace __internal diff --git a/include/oneapi/dpl/pstl/numeric_impl.h b/include/oneapi/dpl/pstl/numeric_impl.h index b87a02ba428..c479060dd09 100644 --- a/include/oneapi/dpl/pstl/numeric_impl.h +++ b/include/oneapi/dpl/pstl/numeric_impl.h @@ -397,16 +397,19 @@ __pattern_adjacent_difference(__parallel_tag<_IsVector>, _ExecutionPolicy&& __ex typedef typename ::std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2; *__d_first = *__first; - __par_backend::__parallel_for( - __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, - [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { - _RandomAccessIterator2 __d_b = __d_first + (__b - __first); - __internal::__brick_walk3( - __b, __e, __b + 1, __d_b + 1, - [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, - _IsVector{}); - }); - return __d_first + (__last - __first); + + return __internal::__except_handler([&]() { + __par_backend::__parallel_for( + __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last - 1, + [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e) { + _RandomAccessIterator2 __d_b = __d_first + (__b - __first); + __internal::__brick_walk3( + __b, __e, __b + 1, __d_b + 1, + [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__y, __x); }, + _IsVector{}); + }); + return __d_first + (__last - __first); + }); } } // namespace __internal From 477b204f2ce29d473561e7182fe38e5224360f0c Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" Date: Thu, 21 Mar 2024 14:57:55 +0000 Subject: [PATCH 10/29] update --- .../detail/enumerate.hpp | 8 ------ .../distributed_ranges_impl/detail/index.hpp | 8 ------ .../detail/iterator_adaptor.hpp | 8 ------ .../distributed_ranges_impl/detail/logger.hpp | 8 ------ .../detail/onedpl_direct_iterator.hpp | 8 ------ .../detail/owning_view.hpp | 8 ------ .../detail/remote_subrange.hpp | 8 ------ .../detail/segments_tools.hpp | 8 ------ .../detail/sycl_utils.hpp | 16 ----------- .../distributed_ranges_impl/detail/utils.hpp | 8 ------ .../detail/view_detectors.hpp | 8 ------ .../shp/algorithms/execution_policy.hpp | 8 ------ .../shp/algorithms/fill.hpp | 8 ------ .../shp/algorithms/for_each.hpp | 8 ------ .../shp/algorithms/iota.hpp | 8 ------ .../shp/algorithms/matrix/gemm.hpp | 8 ------ .../shp/algorithms/matrix/gemv.hpp | 8 ------ .../shp/algorithms/matrix/local_gemm.hpp | 8 ------ .../shp/algorithms/matrix/local_gemv.hpp | 8 ------ .../shp/algorithms/transform.hpp | 8 ------ .../shp/allocators.hpp | 8 ------ .../shp/containers/detail.hpp | 8 ------ .../containers/distributed_dense_matrix.hpp | 8 ------ .../shp/containers/duplicated_vector.hpp | 8 ------ .../shp/containers/matrix_entry.hpp | 16 ----------- .../shp/containers/matrix_partition.hpp | 14 ---------- .../containers/sequential/dense_matrix.hpp | 8 ------ .../shp/containers/sparse_matrix.hpp | 8 ------ .../distributed_ranges_impl/shp/detail.hpp | 8 ------ .../shp/device_ptr.hpp | 8 ------ .../shp/device_ref.hpp | 8 ------ .../shp/device_span.hpp | 8 ------ .../shp/device_vector.hpp | 8 ------ .../shp/distributed_span.hpp | 8 ------ .../shp/distributed_vector.hpp | 8 ------ .../distributed_ranges_impl/shp/future.hpp | 8 ------ .../distributed_ranges_impl/shp/range.hpp | 27 ------------------- .../shp/range_adaptors.hpp | 8 ------ .../distributed_ranges_impl/shp/util.hpp | 8 ------ .../shp/util/coo_matrix.hpp | 8 ------ .../shp/util/generate_random.hpp | 8 ------ .../shp/util/matrix_io.hpp | 8 ------ .../distributed_ranges_impl/shp/vector.hpp | 8 ------ .../shp/views/csr_matrix_view.hpp | 8 ------ .../shp/views/dense_column_view.hpp | 8 ------ .../shp/views/dense_matrix_iterator.hpp | 8 ------ .../shp/views/dense_matrix_view.hpp | 8 ------ .../shp/views/dense_row_view.hpp | 8 ------ .../shp/views/enumerate.hpp | 8 ------ .../shp/views/standard_views.hpp | 8 ------ .../shp/views/views.hpp | 8 ------ .../distributed_ranges_impl/shp/zip_view.hpp | 14 ---------- .../distributed_ranges_impl/views/iota.hpp | 8 ------ .../views/transform.hpp | 8 ------ .../distributed_ranges_impl/views/views.hpp | 8 ------ test/distributed-ranges/shp/CMakeLists.txt | 2 +- 56 files changed, 1 insertion(+), 488 deletions(-) diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp index 5464cb05f7c..a9fd556a1c5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/enumerate.hpp @@ -6,11 +6,7 @@ #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -63,8 +59,4 @@ inline constexpr auto enumerate = enumerate_fn_{}; } // namespace __detail -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp index 6e5a3353449..a9d072c35c2 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/index.hpp @@ -9,11 +9,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace { template @@ -100,11 +96,7 @@ template class index { index_type second; }; -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace std { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp index a60ba676921..5cdc1eb36a5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/iterator_adaptor.hpp @@ -9,11 +9,7 @@ #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace { @@ -201,8 +197,4 @@ template class iterator_adaptor { accessor_type accessor_; }; -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp index 3d13443f365..2b342936425 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp @@ -13,11 +13,7 @@ #include "format_shim.hpp" #include "ranges_shim.hpp" -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 class timer { public: @@ -115,8 +111,4 @@ inline logger drlog; #define DRLOG(...) \ experimental::dr::drlog.debug(nostd::source_location::current(), __VA_ARGS__) -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp index 50e5d671d91..ec5892ecb7e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/onedpl_direct_iterator.hpp @@ -8,11 +8,7 @@ #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -112,8 +108,4 @@ template class direct_iterator { } // namespace __detail -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp index b9b3d0a7970..43074a1f966 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/owning_view.hpp @@ -6,11 +6,7 @@ #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -70,8 +66,4 @@ class owning_view : public rng::view_interface> { } // namespace __detail -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp index 4395b2d313a..7b268ef720b 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/remote_subrange.hpp @@ -9,11 +9,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class remote_subrange : public rng::subrange { @@ -47,11 +43,7 @@ remote_subrange(R &&, std::size_t) -> remote_subrange>; template remote_subrange(R &&) -> remote_subrange>; -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 #if !defined(DR_SPEC) diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp index 7310cd02f5c..73c3bfdb2f4 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/segments_tools.hpp @@ -10,11 +10,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -95,11 +91,7 @@ template auto drop_segments(R &&segments, std::size_t n) { } // namespace __detail -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace DR_RANGES_NAMESPACE { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp index 98d12728cfd..177c785b09e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/sycl_utils.hpp @@ -12,11 +12,7 @@ #include -<<<<<<< HEAD namespace experimental::dr::__detail { -======= -namespace experimental::__detail { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 // With the ND-range workaround, the maximum kernel size is // `std::numeric_limits::max()` rounded down to @@ -195,29 +191,17 @@ sycl::event parallel_for(sycl::queue &q, sycl::range<3> global, Fn &&fn) { using event = sycl::event; -<<<<<<< HEAD } // namespace experimental::dr::__detail #else namespace experimental::dr::__detail { -======= -} // namespace experimental::__detail - -#else - -namespace experimental::__detail { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 class event { public: void wait() {} }; -<<<<<<< HEAD } // namespace experimental::dr::__detail -======= -} // namespace experimental::__detail ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 #endif // SYCL_LANGUAGE_VERSION diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp index 6a813ee79a1..357bdfd8665 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/utils.hpp @@ -4,11 +4,7 @@ #pragma once -<<<<<<< HEAD namespace experimental::dr::__detail { -======= -namespace experimental::__detail { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 inline std::size_t round_up(std::size_t n, std::size_t multiple) { if (multiple == 0) { @@ -31,8 +27,4 @@ inline std::size_t partition_up(std::size_t n, std::size_t multiple) { return round_up(n, multiple) / multiple; } -<<<<<<< HEAD } // namespace experimental::dr::__detail -======= -} // namespace experimental::__detail ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp index 633f635681c..4b39ab5adda 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/view_detectors.hpp @@ -6,11 +6,7 @@ #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template struct is_ref_view : std::false_type {}; template @@ -60,8 +56,4 @@ struct is_zip_view> : std::true_type {}; template inline constexpr bool is_zip_view_v = is_zip_view::value; -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp index 09e01eb34a1..33de94953f7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/execution_policy.hpp @@ -8,11 +8,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 struct device_policy { device_policy(sycl::device device) : devices_({device}) {} @@ -35,8 +31,4 @@ struct device_policy { std::vector devices_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp index eb8c4867a7b..fe287091d38 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/fill.hpp @@ -15,11 +15,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template requires(!std::is_const_v> && @@ -95,8 +91,4 @@ auto fill(Iter first, Iter last, const T &value) { return last; } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp index dfb8f94ba34..1135740f933 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/for_each.hpp @@ -13,11 +13,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template void for_each(ExecutionPolicy &&policy, R &&r, Fn &&fn) { @@ -58,8 +54,4 @@ void for_each(Iter begin, Iter end, Fn &&fn) { for_each(experimental::dr::shp::par_unseq, begin, end, std::forward(fn)); } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp index d66879b7cd2..01ebc163ce6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/iota.hpp @@ -11,11 +11,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template void iota(R &&r, T value) { auto iota_view = rng::views::iota(value, T(value + rng::distance(r))); @@ -32,8 +28,4 @@ void iota(Iter begin, Iter end, T value) { iota(r, value); } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp index ebac8c18521..21f5a803e72 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp @@ -7,11 +7,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template void gemm(distributed_dense_matrix &a, distributed_dense_matrix &b, @@ -246,8 +242,4 @@ void gemm_buffered_async(distributed_dense_matrix &a, } } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp index f56ac2efb13..f90aa1a0e41 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp @@ -14,11 +14,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template @@ -209,8 +205,4 @@ void gemv_square_copy(C &&c, experimental::dr::shp::sparse_matrix &a, B && __detail::wait(events); } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp index 55516a8a4d3..f124d2f2231 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp @@ -10,11 +10,7 @@ #include #endif -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -85,8 +81,4 @@ auto local_gemm(sycl::queue &q, shp::dense_matrix_view a, } // namespace __detail -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp index 74ef503bc17..a6e0d4cf59e 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp @@ -12,11 +12,7 @@ #include #endif -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -101,8 +97,4 @@ auto local_gemv(sycl::queue &q, csr_matrix_view a, Iter b, } // namespace __detail -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp index 6cf6cbcf594..104c0e761a1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/transform.hpp @@ -7,11 +7,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 /** * Applies the given function to a range and stores the result in another range, @@ -95,8 +91,4 @@ auto transform(Iter1 in_begin, Iter1 in_end, Iter2 out_end, Fn &&fn) { std::forward(fn)); } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp index 994cd6fe368..b54539b7955 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/allocators.hpp @@ -10,11 +10,7 @@ #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template using shared_allocator = sycl::usm_allocator; @@ -128,8 +124,4 @@ template class buffered_allocator { std::shared_ptr> buffers_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp index f35b4155c0d..88829221b69 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/detail.hpp @@ -6,11 +6,7 @@ #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace detail { @@ -29,8 +25,4 @@ inline std::tuple factor(std::size_t n) { } // namespace detail -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp index 205d37e31db..d77caded1ff 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp @@ -15,11 +15,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class distributed_dense_matrix_accessor { public: @@ -333,8 +329,4 @@ template class distributed_dense_matrix { std::vector>> tiles_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp index ba791dfa53b..3cf785e0c76 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/duplicated_vector.hpp @@ -7,11 +7,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template > class duplicated_vector { @@ -49,8 +45,4 @@ class duplicated_vector { std::size_t size_ = 0; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp index 3191a8b21f8..538dd09c172 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp @@ -10,11 +10,7 @@ #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class matrix_entry { public: @@ -89,11 +85,7 @@ template class matrix_entry { map_type value_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace std { @@ -115,11 +107,7 @@ struct tuple_size> : integral_constant } // namespace std -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class matrix_ref { @@ -196,11 +184,7 @@ class matrix_ref { scalar_reference value_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace std { diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp index b38e9bd42d0..fa38ce43b1c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp @@ -8,11 +8,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace tile { @@ -89,15 +85,9 @@ class block_cyclic final : public matrix_partition { return grid; } -<<<<<<< HEAD experimental::dr::index<> tile_shape_; experimental::dr::index<> grid_shape_; }; // namespace experimental::dr::shp -======= - dr::index<> tile_shape_; - dr::index<> grid_shape_; -}; // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 inline std::vector partition_matmul(std::size_t m, std::size_t n, std::size_t k) { @@ -122,8 +112,4 @@ inline std::vector partition_matmul(std::size_t m, std::size_t n, return {a_block, b_block, c_block}; } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp index 30652b7e147..1a08e3ecbd3 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp @@ -13,11 +13,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template > class dense_matrix { @@ -144,8 +140,4 @@ class dense_matrix { size_type ld_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp index efc9d495b18..a96f4555256 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp @@ -15,11 +15,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template requires(rng::viewable_range) @@ -414,8 +410,4 @@ template class sparse_matrix { std::vector segments_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp index cbc3eb84d0a..7ba49cb2546 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/detail.hpp @@ -11,11 +11,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -91,8 +87,4 @@ inline void wait(const std::vector &events) { } // namespace __detail -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp index 55715c7c454..a9d9a7a1dd0 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ptr.hpp @@ -10,11 +10,7 @@ #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template requires(std::is_trivially_copyable_v || std::is_void_v) @@ -145,8 +141,4 @@ class device_ptr { T *pointer_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp index 0ea6009cfbb..5cbf13f9544 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_ref.hpp @@ -8,11 +8,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template requires(std::is_trivially_copyable_v || std::is_void_v) @@ -61,8 +57,4 @@ class device_ref { T *pointer_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp index 8775461182a..89147476bee 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_span.hpp @@ -8,11 +8,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 // A `device_span` is simply a normal `std::span` that's // been decorated with an extra `rank()` function, showing @@ -98,8 +94,4 @@ template device_span(R &&, std::size_t) -> device_span, rng::iterator_t>; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp index dd396b2a0f8..b9f1ea4ff42 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/device_vector.hpp @@ -7,11 +7,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class device_vector : public experimental::dr::shp::vector { @@ -38,8 +34,4 @@ template device_vector(std::size_t, const Alloc, std::size_t) -> device_vector; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp index 279229c381d..59992298f6f 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_span.hpp @@ -12,11 +12,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class distributed_span_accessor { public: @@ -258,8 +254,4 @@ distributed_span(R &&r) -> distributed_span< rng::range_value_t, rng::iterator_t>>; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp index 58f0266910e..1175f7ce6d5 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/distributed_vector.hpp @@ -14,11 +14,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class distributed_vector_accessor { public: @@ -218,8 +214,4 @@ struct distributed_vector { std::size_t segment_size_ = 0; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp index a2b648f0fa8..7678e479f07 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/future.hpp @@ -9,11 +9,7 @@ #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class future { public: @@ -49,8 +45,4 @@ template class future { std::vector events_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp index 6233c5da422..f12180c6db1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range.hpp @@ -7,11 +7,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class id { public: @@ -133,28 +129,5 @@ template class segment_range { std::size_t global_offset_; }; -/* -template auto distributed_iota_view(R &&r) { - static_assert(experimental::dr::distributed_contiguous_range); - if constexpr (experimental::dr::distributed_contiguous_range) { - std::vector> iota_segments; - std::size_t global_offset = 0; - std::size_t segment_id = 0; - for (auto &&segment : r.segments()) { - iota_segments.push_back( - segment_range(segment_id, segment.size(), global_offset)); - global_offset += segment.size(); - segment_id++; - } - return experimental::dr::shp::distributed_span(iota_segments); - } else { - return segment_range(0, rng::size(r), 0); - } -} -*/ -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp index 1c09d8bd039..19864776395 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/range_adaptors.hpp @@ -7,19 +7,11 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template auto enumerate(R &&r) { auto i = rng::views::iota(uint32_t(0), uint32_t(rng::size(r))); return experimental::dr::shp::zip_view(i, r); } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp index 56d191c0c8e..ffbcb323df6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp @@ -7,11 +7,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template sycl::device select_device(Selector &&selector) { sycl::device d; @@ -247,8 +243,4 @@ concept sycl_device_selector = requires(T &t, const sycl::device &device) { } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp index 03e5b90d5b9..24f0dc822f6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp @@ -8,11 +8,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -171,8 +167,4 @@ class coo_matrix { } // namespace __detail -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp index 409425b7293..e3ebeb099ed 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp @@ -9,11 +9,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace { @@ -93,8 +89,4 @@ auto generate_random_csr(experimental::dr::index shape, double density = 0.01 return csr_matrix_view(values, rowptr, colind, shape, nnz, 0); } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp index e9abcb7f56b..14c1e24c6e1 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp @@ -15,11 +15,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -290,8 +286,4 @@ auto mmread(std::string file_path, bool one_indexed = true) { one_indexed); } -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp index fdceeec3b50..3a4b35cb7ae 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/vector.hpp @@ -6,11 +6,7 @@ #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 // TODO: deal properly with non-trivially destructible types // - constructors, destructors, assign @@ -249,8 +245,4 @@ template > class vector { allocator_type allocator_; }; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp index b45e75f2578..7d8f1813cd2 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp @@ -8,11 +8,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class csr_matrix_view_accessor { @@ -226,8 +222,4 @@ csr_matrix_view(TIter, IIter, IIter, Args &&...) -> csr_matrix_view, std::iter_value_t, TIter, IIter>; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp index 62c9a710886..4543af1dd04 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp @@ -8,11 +8,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class dense_matrix_column_accessor { public: using size_type = std::size_t; @@ -112,8 +108,4 @@ template dense_matrix_column_view(Iter, std::size_t, std::size_t, std::size_t) -> dense_matrix_column_view, Iter>; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp index 18d3743e9c2..b2674679b20 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp @@ -12,11 +12,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class dense_matrix_accessor { public: @@ -110,8 +106,4 @@ using dense_matrix_iterator = template using dense_matrix_view_iterator = dense_matrix_iterator; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp index f78c2a48dab..4ad164708b7 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp @@ -14,11 +14,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class dense_matrix_view @@ -125,8 +121,4 @@ template dense_matrix_view(dense_matrix &) -> dense_matrix_view::pointer>; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp index 4c329b8a412..d88e0b3682c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp @@ -9,11 +9,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class dense_matrix_row_accessor { public: using size_type = std::size_t; @@ -110,8 +106,4 @@ template dense_matrix_row_view(Iter, std::size_t, std::size_t) -> dense_matrix_row_view, Iter>; -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp index 6b2f93f2901..2818d82cd4c 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/enumerate.hpp @@ -6,11 +6,7 @@ #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace views { @@ -57,8 +53,4 @@ inline constexpr auto enumerate = enumerate_fn_{}; } // namespace views -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp index c81d00eb9e3..eb503a24961 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp @@ -11,11 +11,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp { -======= -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace views { @@ -48,8 +44,4 @@ inline auto slice(experimental::dr::index<> slice_indices) { } // namespace views -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp index 5c9cd87ca5b..3b763c6a897 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/views.hpp @@ -9,11 +9,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr::shp::views { -======= -namespace experimental::shp::views { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 inline constexpr auto all = rng::views::all; @@ -27,8 +23,4 @@ inline constexpr auto take = rng::views::take; inline constexpr auto transform = experimental::dr::views::transform; -<<<<<<< HEAD } // namespace experimental::dr::shp::views -======= -} // namespace experimental::shp::views ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp index 8c0d893df82..ff556646000 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/zip_view.hpp @@ -12,11 +12,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template struct is_owning_view : std::false_type {}; // template @@ -25,15 +21,9 @@ template struct is_owning_view : std::false_type {}; template inline constexpr bool is_owning_view_v = is_owning_view{}; -<<<<<<< HEAD }; // namespace experimental::dr namespace experimental::dr::shp { -======= -}; // namespace experimental - -namespace experimental::shp { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 namespace __detail { @@ -356,8 +346,4 @@ template auto zip(Rs &&...rs) { } // namespace views -<<<<<<< HEAD } // namespace experimental::dr::shp -======= -} // namespace experimental::shp ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp index 9f75479eb0d..82a227ef445 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/iota.hpp @@ -4,11 +4,7 @@ #pragma once -<<<<<<< HEAD namespace experimental::dr::views { -======= -namespace experimental::views { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 // // range-v3 iota uses sentinels that are not the same type as the @@ -28,8 +24,4 @@ struct iota_fn_ { inline constexpr auto iota = iota_fn_{}; -<<<<<<< HEAD } // namespace experimental::dr::views -======= -} // namespace experimental::views ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp index 08275cf7ae4..d8a3a23bfc9 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/transform.hpp @@ -11,11 +11,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 template class transform_iterator { @@ -199,11 +195,7 @@ class transform_fn_ { inline constexpr auto transform = transform_fn_{}; } // namespace views -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 #if !defined(DR_SPEC) diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp index 3a5869d00d9..6ba6ef64806 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/views/views.hpp @@ -7,11 +7,7 @@ #include #include -<<<<<<< HEAD namespace experimental::dr { -======= -namespace experimental { ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 // returns range: [(rank, element) ...] auto ranked_view(const experimental::dr::distributed_range auto &r) { @@ -19,8 +15,4 @@ auto ranked_view(const experimental::dr::distributed_range auto &r) { return rng::views::zip(rng::views::transform(r, rank), r); } -<<<<<<< HEAD } // namespace experimental::dr -======= -} // namespace experimental ->>>>>>> cd565891f4ffdd0b4641810a38c60c683e5f1fe0 diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index 93ba0dcca4c..0325a4dd7f6 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -32,7 +32,7 @@ find_package(MKL REQUIRED) # include(GoogleTest) add_library(dr_shp INTERFACE) -add_library(experimental::dr::shp ALIAS dr_shp) +add_library(DR::shp ALIAS dr_shp) target_include_directories(dr_shp INTERFACE . vendor ../../../include) target_compile_definitions(dr_shp INTERFACE USE_MKL From 91c3a9ad498dc9c12ce7827b6e3e66a2908a88dc Mon Sep 17 00:00:00 2001 From: Timmie Smith Date: Fri, 22 Mar 2024 08:33:21 -0500 Subject: [PATCH 11/29] Removing noexcept to resolve issue reported by static analysis. (#1462) --------- Co-authored-by: Sergey Kopienko <> --- include/oneapi/dpl/pstl/algorithm_fwd.h | 2 +- include/oneapi/dpl/pstl/algorithm_impl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_fwd.h b/include/oneapi/dpl/pstl/algorithm_fwd.h index 5af00e5425c..556490c8d19 100644 --- a/include/oneapi/dpl/pstl/algorithm_fwd.h +++ b/include/oneapi/dpl/pstl/algorithm_fwd.h @@ -102,7 +102,7 @@ _RandomAccessIterator __brick_walk1_n(_RandomAccessIterator, _DifferenceType, _F template _ForwardIterator -__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Function) noexcept; +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Function); template _RandomAccessIterator diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index 1cb2c4f38a6..7f9db008b45 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -219,7 +219,7 @@ __brick_walk1_n(_RandomAccessIterator __first, _DifferenceType __n, _Function __ template _ForwardIterator -__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f) noexcept +__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f) { static_assert(__is_serial_tag_v<_Tag> || __is_parallel_forward_tag_v<_Tag>); From 015243773655a9dce1df0eb6a69e26442ed053a8 Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" Date: Mon, 25 Mar 2024 14:32:39 +0000 Subject: [PATCH 12/29] CI updated for distributed-ranges --- .github/workflows/ci.yml | 6 ++++++ test/CMakeLists.txt | 18 ++++++++++++++++-- test/distributed-ranges/shp/CMakeLists.txt | 5 ----- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29e9111dc72..35a3f1ea5e6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -172,6 +172,12 @@ jobs: build_type: release backend: serial device_type: HOST + - os: ubuntu-20.04 + cxx_compiler: icpx + std: 20 + build_type: release + backend: dpcpp + device_type: HOST steps: - uses: actions/checkout@v3 - name: Set up Intel APT repository diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 91adf973d02..8893497fc99 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -12,7 +12,10 @@ # ##===----------------------------------------------------------------------===## add_subdirectory(kt) -add_subdirectory(distributed-ranges/shp) + +if (ONEDPL_USE_DR) + add_subdirectory(distributed-ranges/shp) +endif() # rng_tests set (ranlux_24_48_test.pass_timeout_debug "900") # 15min set (ranlux_24_48_test.pass_timeout_release "720") # 12min @@ -66,10 +69,16 @@ endif() add_custom_target(build-onedpl-tests COMMENT "Build all oneDPL tests") +if (ONEDPL_USE_DR) + set(run-onedpl-tests-depends build-onedpl-tests shp-tests shp-tests-3) +else() + set(run-onedpl-tests-depends build-onedpl-tests) +endif() + add_custom_target(run-onedpl-tests COMMAND "${CMAKE_CTEST_COMMAND}" --output-on-failure USES_TERMINAL - DEPENDS build-onedpl-tests + DEPENDS ${run-onedpl-tests-depends} COMMENT "Build and run all oneDPL tests") macro(onedpl_construct_exec test_source_file _test_name switch_off_checked_iterators custom_define) @@ -249,3 +258,8 @@ if (TARGET interop_allocs.pass) target_sources(interop_allocs.pass PRIVATE "${CMAKE_CURRENT_LIST_DIR}/pstl_offload/memory/interop_allocs_system.cpp" "${CMAKE_CURRENT_LIST_DIR}/pstl_offload/memory/interop_allocs_usm.cpp") endif() + +if (ONEDPL_USE_DR) + add_custom_target(build-dr-tests COMMENT "Build dr tests" DEPENDS shp-tests shp-tests-3) + add_custom_target(run-dr-tests COMMENT "Run dr tests" DEPENDS build-dr-tests COMMAND ./distributed-ranges/shp/shp-tests ./distributed-ranges/shp/shp-tests-3) +endif() \ No newline at end of file diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index 0325a4dd7f6..9d32775bf21 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -69,11 +69,6 @@ add_executable( add_executable(shp-tests-3 shp-tests.cpp containers-3.cpp copy-3.cpp) -# skeleton for rapid builds of individual tests, feel free to change this -# add_executable(shp-quick-test shp-tests.cpp ../common/transform.cpp) -# target_compile_definitions(shp-quick-test PRIVATE QUICK_TEST) - -#foreach(test-exec IN ITEMS shp-tests shp-tests-3 shp-quick-test) foreach(test-exec IN ITEMS shp-tests shp-tests-3) target_link_libraries(${test-exec} GTest::gtest_main DR::shp fmt::fmt cxxopts) endforeach() From f489ff400ed8ba16eafce224ca6ad8f5bbbb4475 Mon Sep 17 00:00:00 2001 From: Anuya Welling Date: Wed, 27 Mar 2024 11:27:20 -0500 Subject: [PATCH 13/29] [Dynamic Selection] Skipping auto_tune and dynamic load tests when device=FPGA_EMU (#1440) Skipping tests with default device as FPGA or FPGA_EMU --- .../dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp | 2 ++ .../sycl/test_dynamic_load_policy_sycl.pass.cpp | 2 ++ test/support/utils_sycl.h | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp b/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp index f5294f2e382..e24c4c00522 100644 --- a/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp +++ b/test/parallel_api/dynamic_selection/sycl/test_auto_tune_policy_sycl.pass.cpp @@ -492,6 +492,7 @@ main() bool bProcessed = false; #if TEST_DYNAMIC_SELECTION_AVAILABLE +#if !ONEDPL_FPGA_DEVICE || !ONEDPL_FPGA_EMULATOR using policy_t = oneapi::dpl::experimental::auto_tune_policy; std::vector u; build_auto_tune_universe(u); @@ -542,6 +543,7 @@ main() bProcessed = true; } +#endif // Devices available are CPU and GPU #endif // TEST_DYNAMIC_SELECTION_AVAILABLE return TestUtils::done(bProcessed); diff --git a/test/parallel_api/dynamic_selection/sycl/test_dynamic_load_policy_sycl.pass.cpp b/test/parallel_api/dynamic_selection/sycl/test_dynamic_load_policy_sycl.pass.cpp index 17d6de6aa4f..b473892af19 100644 --- a/test/parallel_api/dynamic_selection/sycl/test_dynamic_load_policy_sycl.pass.cpp +++ b/test/parallel_api/dynamic_selection/sycl/test_dynamic_load_policy_sycl.pass.cpp @@ -47,6 +47,7 @@ main() bool bProcessed = false; #if TEST_DYNAMIC_SELECTION_AVAILABLE +#if !ONEDPL_FPGA_DEVICE || !ONEDPL_FPGA_EMULATOR using policy_t = oneapi::dpl::experimental::dynamic_load_policy; std::vector u; build_dl_universe(u); @@ -76,6 +77,7 @@ main() bProcessed = true; } +#endif // Devices available are CPU and GPU #endif // TEST_DYNAMIC_SELECTION_AVAILABLE return TestUtils::done(bProcessed); diff --git a/test/support/utils_sycl.h b/test/support/utils_sycl.h index d88eee142fa..b2f351d716b 100644 --- a/test/support/utils_sycl.h +++ b/test/support/utils_sycl.h @@ -106,7 +106,7 @@ make_new_policy(_Policy&& __policy) #if ONEDPL_FPGA_DEVICE inline auto default_selector = # if ONEDPL_FPGA_EMULATOR - sycl::ext::intel::fpga_emulator_selector{}; + sycl::ext::intel::fpga_emulator_selector_v; # else sycl::ext::intel::fpga_selector{}; # endif // ONEDPL_FPGA_EMULATOR From 5bac408e8305d2f179bc19554bc217e78c5c0e23 Mon Sep 17 00:00:00 2001 From: Dmitriy Sobolev Date: Tue, 2 Apr 2024 07:53:01 -0500 Subject: [PATCH 14/29] [CI] Update DPC++ and oneTBB on Windows (#1473) --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29e9111dc72..722527177f5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,8 +15,8 @@ env: BUILD_CONCURRENCY: 2 MACOS_BUILD_CONCURRENCY: 3 TEST_TIMEOUT: 360 - WINDOWS_TBB_DOWNLOAD_LINK: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/64957c0f-37bf-4408-909c-37ff52fe5119/w_tbb_oneapi_p_2021.11.0.49526.exe - WINDOWS_ICPX_DOWNLOAD_LINK: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/94e15cb5-4bcc-4fdd-91cf-0f819a54e42e/w_dpcpp-cpp-compiler_p_2024.0.2.28_offline.exe + WINDOWS_TBB_DOWNLOAD_LINK: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/c0b87e5c-1e1f-431f-b26e-dc250032e586/w_tbb_oneapi_p_2021.12.0.500_offline.exe + WINDOWS_ICPX_DOWNLOAD_LINK: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/a1d6c917-05ab-4883-b67b-4bd60abb74e5/w_dpcpp-cpp-compiler_p_2024.1.0.469_offline.exe WINDOWS_ONEAPI_PATH: C:\Program Files (x86)\Intel\oneAPI LINUX_ONEAPI_PATH: /opt/intel/oneapi # TODO: get rid of a deprecated configuration: Intel® C++ Compiler Classic From 6a45be7078a636676b6a128a142e5d02213722ca Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger <109972525+danhoeflinger@users.noreply.github.com> Date: Thu, 4 Apr 2024 11:48:10 -0400 Subject: [PATCH 15/29] Adding std::vector::iterator to is_passed_directly types (#1438) Changes USM allocator allocated std::vectors from being processed like host-side iterators to being "passed directly" (into sycl kernels) when it is possible to detect them. The C++ standard library implementation may or may not include allocator information in the iterator type. If it does, we can detect such iterators, and treat them as "passed directly". Where it is not possible, they are still treated as host-side iterators and wrapped in a sycl::buffer. Signed-off-by: Dan Hoeflinger --- .../dpl/pstl/hetero/dpcpp/sycl_iterator.h | 28 +++++++++++++++++++ .../dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h | 11 ++++++++ 2 files changed, 39 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h index 05047f61d77..27bfb0d4a88 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_iterator.h @@ -149,6 +149,34 @@ struct _ModeConverter static constexpr access_mode __value = access_mode::discard_write; }; +template ::value_type> +using __default_alloc_vec_iter = typename std::vector::iterator; + +template ::value_type> +using __usm_shared_alloc_vec_iter = + typename std::vector>::iterator; + +template ::value_type> +using __usm_host_alloc_vec_iter = + typename std::vector>::iterator; + +// Evaluates to true if the provided type is an iterator with a value_type and if the implementation of a +// std::vector::iterator can be distinguished between three different allocators, the +// default, usm_shared, and usm_host. If all are distinct, it is very unlikely any non-usm based allocator +// could be confused with a usm allocator. +template +struct __vector_iter_distinguishes_by_allocator : std::false_type +{ +}; +template +struct __vector_iter_distinguishes_by_allocator< + Iter, std::enable_if_t, __usm_shared_alloc_vec_iter> && + !std::is_same_v<__default_alloc_vec_iter, __usm_host_alloc_vec_iter> && + !std::is_same_v<__usm_host_alloc_vec_iter, __usm_shared_alloc_vec_iter>>> + : std::true_type +{ +}; + } // namespace __internal template diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h index 1821301e911..9351b20dc88 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/utils_ranges_sycl.h @@ -22,6 +22,7 @@ #include "../../utils_ranges.h" #include "../../iterator_impl.h" #include "../../glue_numeric_defs.h" +#include "sycl_iterator.h" #include "sycl_defs.h" namespace oneapi @@ -206,6 +207,16 @@ struct is_passed_directly +struct is_passed_directly< + Iter, std::enable_if_t<(std::is_same_v> || + std::is_same_v>) && + oneapi::dpl::__internal::__vector_iter_distinguishes_by_allocator::value>> : + std::true_type +{ +}; + template struct is_passed_directly> : ::std::true_type { From 2f843192736bdd3ba6a6b332a70689981a1953cf Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Fri, 5 Apr 2024 12:49:30 +0200 Subject: [PATCH 16/29] [oneDPL][hetero] + missed synch between patterns,removed unnecessary synch, + comments --- .../dpl/pstl/hetero/algorithm_impl_hetero.h | 61 ++++++++++++++----- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h index 4374089568a..4ffc45b3047 100644 --- a/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h +++ b/include/oneapi/dpl/pstl/hetero/algorithm_impl_hetero.h @@ -948,7 +948,7 @@ __pattern_copy_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterato auto __res = __par_backend_hetero::__parallel_copy_if(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __buf1.all_view(), __buf2.all_view(), __n, __pred); - ::std::size_t __num_copied = __res.get(); + ::std::size_t __num_copied = __res.get(); //is a blocking call return __result_first + __num_copied; } @@ -1028,8 +1028,11 @@ __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, auto __copy_last = __pattern_copy_if(__tag, __exec, __first, __last, __copy_first, __not_pred<_Predicate>{__pred}); - //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - return __pattern_walk2( + //TODO: To optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer + // __pattern_copy_if above may be async due to there is implicit synchronization on sycl::buffer and the accessors + + //An explicit wait isn't required here because we have implicit synchronization on sycl::buffer destructor. + return __pattern_walk2( __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } @@ -1049,7 +1052,9 @@ __pattern_unique(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _It auto __copy_last = __pattern_unique_copy(__tag, __exec, __first, __last, __copy_first, __pred); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - return __pattern_walk2( __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); @@ -1230,7 +1235,9 @@ __pattern_inplace_merge(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __ex __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::write>(__copy_first), __comp); //TODO: optimize copy back depending on Iterator, i.e. set_final_data for host iterator/pointer - __pattern_walk2( + + //An explicit wait isn't required here because we have implicit synchronization on sycl::buffer destructor. + __pattern_walk2( __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __copy_first, __copy_last, __first, __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); } @@ -1315,15 +1322,19 @@ __pattern_stable_partition(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& _ auto true_count = copy_result.first - __true_result; //TODO: optimize copy back if possible (inplace, decrease number of submits) - __pattern_walk2( + __pattern_walk2( __tag, __par_backend_hetero::make_wrapped_policy(__exec), __true_result, copy_result.first, __first, __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); - __pattern_walk2( + //We don't need synchronization between these patterns due to the data are being processed independently. + + __pattern_walk2( __tag, __par_backend_hetero::make_wrapped_policy(::std::forward<_ExecutionPolicy>(__exec)), __false_result, copy_result.second, __first + true_count, __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); + //An explicit wait isn't required here because we have implicit synchronization on sycl::buffer destructor. + return __first + true_count; } @@ -1486,11 +1497,17 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& { // If our output buffer is larger than the input buffer, simply copy elements to the output and use // full sort on them. - auto __out_end = __pattern_walk2( - __tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, __out_first, - __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); + auto __out_end = + __pattern_walk2(__tag, __par_backend_hetero::make_wrapped_policy<__initial_copy_1>(__exec), __first, __last, + __out_first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); - // Use regular sort as partial_sort isn't required to be stable + // TODO: __pattern_walk2 is a blocking call here, so there is a synchronization between the patterns. + // But, when the input iterators are a kind of hetero iterator on top of sycl::buffer, SYCL + // runtime makes a dependency graph. In that case the call of __pattern_walk2 could be changed to + // be asynchronous for better performance. + + // Use regular sort as partial_sort isn't required to be stable. + //__pattern_sort is a blocking call. __pattern_sort( __tag, __par_backend_hetero::make_wrapped_policy<__partial_sort_1>(::std::forward<_ExecutionPolicy>(__exec)), @@ -1514,15 +1531,21 @@ __pattern_partial_sort_copy(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& auto __buf_mid = __buf_first + __out_size; + // An explicit wait between the patterns isn't required here because we are working a with temporary + // sycl::buffer and sycl accessors. SYCL runtime makes a dependency graph to prevent the races between + // the patterns: __pattern_walk2, __parallel_partial_sort and __pattern_walk2. + __par_backend_hetero::__parallel_partial_sort( _BackendTag{}, __par_backend_hetero::make_wrapped_policy<__partial_sort_2>(__exec), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_first), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_mid), __par_backend_hetero::make_iter_mode<__par_backend_hetero::access_mode::read_write>(__buf_last), __comp); - return __pattern_walk2( + return __pattern_walk2( __tag, __par_backend_hetero::make_wrapped_policy<__copy_back>(::std::forward<_ExecutionPolicy>(__exec)), __buf_first, __buf_mid, __out_first, __brick_copy<__hetero_tag<_BackendTag>, _ExecutionPolicy>{}); + + //An explicit wait isn't required here because we have implicit synchronization on sycl::buffer destructor. } } @@ -1621,21 +1644,27 @@ __pattern_rotate(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Iterator auto __buf = __keep(__first, __last); auto __temp_buf = oneapi::dpl::__par_backend_hetero::__buffer<_ExecutionPolicy, _Tp>(__exec, __n); - auto __temp_rng = + auto __temp_rng_w = oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::write>(__temp_buf.get_buffer()); const auto __shift = __new_first - __first; oneapi::dpl::__par_backend_hetero::__parallel_for( _BackendTag{}, oneapi::dpl::__par_backend_hetero::make_wrapped_policy<__rotate_wrapper>(__exec), unseq_backend::__rotate_copy::difference_type>{__n, __shift}, __n, - __buf.all_view(), __temp_rng); + __buf.all_view(), __temp_rng_w); + + //An explicit wait isn't required here because we are working with a temporary sycl::buffer and sycl accessors and + //SYCL runtime makes a dependency graph to prevent the races between two __parallel_for patterns. using _Function = __brick_move<__hetero_tag<_BackendTag>, _ExecutionPolicy>; auto __brick = unseq_backend::walk_n<_ExecutionPolicy, _Function>{_Function{}}; + auto __temp_rng_rw = + oneapi::dpl::__ranges::all_view<_Tp, __par_backend_hetero::access_mode::read_write>(__temp_buf.get_buffer()); oneapi::dpl::__par_backend_hetero::__parallel_for(_BackendTag{}, ::std::forward<_ExecutionPolicy>(__exec), __brick, - __n, __temp_rng, __buf.all_view()) - .wait(); + __n, __temp_rng_rw, __buf.all_view()); + + //An explicit wait doesn't need here because we have implicit synchronization (and wait) on sycl::buffer destructor. return __first + (__last - __new_first); } From 658e46561a8647f484269e2878eb87dd75bc1c5a Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Thu, 1 Feb 2024 17:52:20 +0100 Subject: [PATCH 17/29] [oneDPL][sycl] + sycl::is_device_copyable specialization of the SYCL trait for some oneDPL types --- .../dpl/pstl/hetero/dpcpp/sycl_traits.h | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h new file mode 100644 index 00000000000..c3cf1dcc1c8 --- /dev/null +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -0,0 +1,49 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Copyright (C) Intel Corporation +// +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// This file incorporates work covered by the following copyright and permission +// notice: +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// +//===----------------------------------------------------------------------===// + +// This file contains some specialization SYCL traits for some oneDPL types +// +// Include this header before a kernel submit SYCL code + +#ifndef _ONEDPL_SYCL_TRAITS_H +#define _ONEDPL_SYCL_TRAITS_H + +#define _ONEDPL_DEVICE_COPYABLE(TYPE) \ +template \ +struct sycl::is_device_copyable>: ::std::conjunction...> {}; + +using namespace oneapi::dpl::__internal; + +_ONEDPL_DEVICE_COPYABLE(__not_pred) +_ONEDPL_DEVICE_COPYABLE(__reorder_pred) +_ONEDPL_DEVICE_COPYABLE(__equal_value_by_pred) +_ONEDPL_DEVICE_COPYABLE(__equal_value) +_ONEDPL_DEVICE_COPYABLE(__not_equal_value) +_ONEDPL_DEVICE_COPYABLE(__transform_functor) +_ONEDPL_DEVICE_COPYABLE(__transform_if_unary_functor) +_ONEDPL_DEVICE_COPYABLE(__transform_if_binary_functor) +_ONEDPL_DEVICE_COPYABLE(__replace_functor) +_ONEDPL_DEVICE_COPYABLE(__replace_copy_functor) +_ONEDPL_DEVICE_COPYABLE(zip_forward_iterator) + +using namespace oneapi::dpl; + +_ONEDPL_DEVICE_COPYABLE(zip_iterator) +_ONEDPL_DEVICE_COPYABLE(transform_iterator) +_ONEDPL_DEVICE_COPYABLE(permutation_iterator) + +#undef _ONEDPL_DEVICE_COPYABLE + +#endif // _ONEDPL_SYCL_TRAITS_H From 8e943bbe953357d402a031a4f572e98bfec727b0 Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Mon, 12 Feb 2024 19:17:05 +0100 Subject: [PATCH 18/29] [oneDPL][sycl] + sycl::is_device_copyable specialization fixes --- .../dpl/pstl/hetero/dpcpp/sycl_traits.h | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index c3cf1dcc1c8..1153e7c2379 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -20,10 +20,20 @@ #ifndef _ONEDPL_SYCL_TRAITS_H #define _ONEDPL_SYCL_TRAITS_H +#if __INTEL_LLVM_COMPILER && (__INTEL_LLVM_COMPILER < 20240100) + +#define _ONEDPL_DEVICE_COPYABLE(TYPE) \ +template \ +struct sycl::is_device_copyable, ::std::enable_if_t>>>: ::std::conjunction...> {}; + +#else + #define _ONEDPL_DEVICE_COPYABLE(TYPE) \ template \ struct sycl::is_device_copyable>: ::std::conjunction...> {}; +#endif + using namespace oneapi::dpl::__internal; _ONEDPL_DEVICE_COPYABLE(__not_pred) @@ -38,6 +48,51 @@ _ONEDPL_DEVICE_COPYABLE(__replace_functor) _ONEDPL_DEVICE_COPYABLE(__replace_copy_functor) _ONEDPL_DEVICE_COPYABLE(zip_forward_iterator) +_ONEDPL_DEVICE_COPYABLE(fill_functor) +_ONEDPL_DEVICE_COPYABLE(generate_functor) +_ONEDPL_DEVICE_COPYABLE(__brick_fill) +_ONEDPL_DEVICE_COPYABLE(__brick_fill_n) +_ONEDPL_DEVICE_COPYABLE(__search_n_unary_predicate) +_ONEDPL_DEVICE_COPYABLE(__is_heap_check) + +_ONEDPL_DEVICE_COPYABLE(equal_predicate) +_ONEDPL_DEVICE_COPYABLE(adjacent_find_fn) +_ONEDPL_DEVICE_COPYABLE(__create_mask_unique_copy) + +_ONEDPL_DEVICE_COPYABLE(__op_uninitialized_fill) + +using namespace oneapi::dpl::__par_backend_hetero; + +_ONEDPL_DEVICE_COPYABLE(__early_exit_find_or); + +using namespace oneapi::dpl::unseq_backend; + +_ONEDPL_DEVICE_COPYABLE(walk_n) +_ONEDPL_DEVICE_COPYABLE(walk_adjacent_difference) +_ONEDPL_DEVICE_COPYABLE(transform_reduce) +_ONEDPL_DEVICE_COPYABLE(reduce_over_group) +_ONEDPL_DEVICE_COPYABLE(single_match_pred_by_idx) +_ONEDPL_DEVICE_COPYABLE(multiple_match_pred) +_ONEDPL_DEVICE_COPYABLE(n_elem_match_pred) +_ONEDPL_DEVICE_COPYABLE(first_match_pred) +_ONEDPL_DEVICE_COPYABLE(__create_mask) +_ONEDPL_DEVICE_COPYABLE(__copy_by_mask) +_ONEDPL_DEVICE_COPYABLE(__partition_by_mask) +_ONEDPL_DEVICE_COPYABLE(__global_scan_functor) +_ONEDPL_DEVICE_COPYABLE(__scan) +_ONEDPL_DEVICE_COPYABLE(__brick_includes) +_ONEDPL_DEVICE_COPYABLE(__brick_set_op) +_ONEDPL_DEVICE_COPYABLE(__brick_reduce_idx) + +using namespace oneapi::dpl::internal; + +_ONEDPL_DEVICE_COPYABLE(custom_brick) +_ONEDPL_DEVICE_COPYABLE(replace_if_fun) +_ONEDPL_DEVICE_COPYABLE(scan_by_key_fun) +_ONEDPL_DEVICE_COPYABLE(segmented_scan_fun) +_ONEDPL_DEVICE_COPYABLE(scatter_and_accumulate_fun) +_ONEDPL_DEVICE_COPYABLE(transform_if_stencil_fun) + using namespace oneapi::dpl; _ONEDPL_DEVICE_COPYABLE(zip_iterator) From f31af793f444d39cc38cf807f1dd548fb11dac37 Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Tue, 13 Feb 2024 14:32:28 +0100 Subject: [PATCH 19/29] [oneDPL][sycl] sycl::is_device_copyable specialization: + clang format --- .../oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index 1153e7c2379..bc6dfb1d85a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -22,15 +22,20 @@ #if __INTEL_LLVM_COMPILER && (__INTEL_LLVM_COMPILER < 20240100) -#define _ONEDPL_DEVICE_COPYABLE(TYPE) \ -template \ -struct sycl::is_device_copyable, ::std::enable_if_t>>>: ::std::conjunction...> {}; +# define _ONEDPL_DEVICE_COPYABLE(TYPE) \ + template \ + struct sycl::is_device_copyable, ::std::enable_if_t>>> \ + : ::std::conjunction...> \ + { \ + }; #else -#define _ONEDPL_DEVICE_COPYABLE(TYPE) \ -template \ -struct sycl::is_device_copyable>: ::std::conjunction...> {}; +# define _ONEDPL_DEVICE_COPYABLE(TYPE) \ + template \ + struct sycl::is_device_copyable> : ::std::conjunction...> \ + { \ + }; #endif From 35bb6bfb8e04ccb86cdfa024a3856dd887638186 Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Tue, 13 Feb 2024 15:36:12 +0100 Subject: [PATCH 20/29] [oneDPL][sycl] sycl::is_device_copyable specialization: + a comment --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index bc6dfb1d85a..d4da0b406b8 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -13,9 +13,13 @@ // //===----------------------------------------------------------------------===// -// This file contains some specialization SYCL traits for some oneDPL types +// This file contains some specialization SYCL traits for some oneDPL types. // -// Include this header before a kernel submit SYCL code +// Fancy iterators and internal functors which are device copyable when their +// template arguments are also device copyable should be explicitly specialized +// as such. This is important when template argument member variables may be +// device copyable but not trivially copyable. +// Include this header before a kernel submit SYCL code. #ifndef _ONEDPL_SYCL_TRAITS_H #define _ONEDPL_SYCL_TRAITS_H From 80cf60bb9226de19d86c5ef789ce5f7bc8f5f6ae Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Tue, 13 Feb 2024 15:53:35 +0100 Subject: [PATCH 21/29] [oneDPL][sycl] + #include "sycl_traits.h" --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index d60d5b3626e..7cda428e542 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -40,6 +40,8 @@ # include "parallel_backend_sycl_radix_sort.h" #endif +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl From 89c92c3e1378eaee207d0d88edaad50f718d78ef Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Wed, 14 Feb 2024 14:31:46 +0100 Subject: [PATCH 22/29] [oneDPL][sycl] + forward declaration for __early_exit_find_or --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index d4da0b406b8..cf2b2a0b7d1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -72,6 +72,9 @@ _ONEDPL_DEVICE_COPYABLE(__op_uninitialized_fill) using namespace oneapi::dpl::__par_backend_hetero; +template +struct __early_exit_find_or; + _ONEDPL_DEVICE_COPYABLE(__early_exit_find_or); using namespace oneapi::dpl::unseq_backend; From 60884a2975c8e70a52d70ab90670fd913a824d54 Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Wed, 14 Feb 2024 16:21:20 +0100 Subject: [PATCH 23/29] [oneDPL][sycl] + necessary includes --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index cf2b2a0b7d1..b7cbda6654f 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -24,6 +24,12 @@ #ifndef _ONEDPL_SYCL_TRAITS_H #define _ONEDPL_SYCL_TRAITS_H +#include "../../../internal/function.h" +#include "../../../binary_search_impl.h" +#include "unseq_backend_sycl.h" +#include "../../utils.h" +#include "../../iterator_impl.h" + #if __INTEL_LLVM_COMPILER && (__INTEL_LLVM_COMPILER < 20240100) # define _ONEDPL_DEVICE_COPYABLE(TYPE) \ From ddaf6fe69cb3e8e2bddccc2a74e1a0d2cb25c6d7 Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Wed, 14 Feb 2024 18:01:14 +0100 Subject: [PATCH 24/29] Revert "[oneDPL][sycl] + necessary includes" This reverts commit cb7259d85e8e711236a022061b2a3eb301ce76fd. --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index b7cbda6654f..cf2b2a0b7d1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -24,12 +24,6 @@ #ifndef _ONEDPL_SYCL_TRAITS_H #define _ONEDPL_SYCL_TRAITS_H -#include "../../../internal/function.h" -#include "../../../binary_search_impl.h" -#include "unseq_backend_sycl.h" -#include "../../utils.h" -#include "../../iterator_impl.h" - #if __INTEL_LLVM_COMPILER && (__INTEL_LLVM_COMPILER < 20240100) # define _ONEDPL_DEVICE_COPYABLE(TYPE) \ From 797498d2114389c435219de7102f2b22a08402c7 Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Thu, 15 Feb 2024 15:49:23 +0100 Subject: [PATCH 25/29] [oneDPL][sycl] + necessary forward declarations --- .../dpl/pstl/hetero/dpcpp/sycl_traits.h | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index cf2b2a0b7d1..ae99147ac77 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -57,19 +57,37 @@ _ONEDPL_DEVICE_COPYABLE(__replace_functor) _ONEDPL_DEVICE_COPYABLE(__replace_copy_functor) _ONEDPL_DEVICE_COPYABLE(zip_forward_iterator) +template +struct fill_functor; + +template +struct generate_functor; + +template +struct equal_predicate; + +template +struct __search_n_unary_predicate; + +template +struct adjacent_find_fn; + +template +struct __is_heap_check; + +template +struct __create_mask_unique_copy; + _ONEDPL_DEVICE_COPYABLE(fill_functor) _ONEDPL_DEVICE_COPYABLE(generate_functor) _ONEDPL_DEVICE_COPYABLE(__brick_fill) _ONEDPL_DEVICE_COPYABLE(__brick_fill_n) _ONEDPL_DEVICE_COPYABLE(__search_n_unary_predicate) _ONEDPL_DEVICE_COPYABLE(__is_heap_check) - _ONEDPL_DEVICE_COPYABLE(equal_predicate) _ONEDPL_DEVICE_COPYABLE(adjacent_find_fn) _ONEDPL_DEVICE_COPYABLE(__create_mask_unique_copy) -_ONEDPL_DEVICE_COPYABLE(__op_uninitialized_fill) - using namespace oneapi::dpl::__par_backend_hetero; template @@ -98,6 +116,24 @@ _ONEDPL_DEVICE_COPYABLE(__brick_reduce_idx) using namespace oneapi::dpl::internal; +template +struct custom_brick; + +template +struct replace_if_fun; + +template +class transform_if_stencil_fun; + +template +struct segmented_scan_fun; + +template +class scatter_and_accumulate_fun; + +template +struct scan_by_key_fun; + _ONEDPL_DEVICE_COPYABLE(custom_brick) _ONEDPL_DEVICE_COPYABLE(replace_if_fun) _ONEDPL_DEVICE_COPYABLE(scan_by_key_fun) From 03c69f18621a9366fe1cde6855852498eaedebb0 Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Thu, 15 Feb 2024 16:55:42 +0100 Subject: [PATCH 26/29] [oneDPL][sycl] include place changed --- include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 -- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h | 2 ++ .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h | 2 ++ .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 7cda428e542..d60d5b3626e 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -40,8 +40,6 @@ # include "parallel_backend_sycl_radix_sort.h" #endif -#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. - namespace oneapi { namespace dpl diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 95d23fc16e9..7baee78b1b1 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -34,6 +34,8 @@ #include "../../iterator_impl.h" #include "sycl_iterator.h" +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index e7ac2ba50e1..ee864a53594 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -29,6 +29,8 @@ #include "../../histogram_binhash_utils.h" #include "../../utils.h" +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index cd474afbf1f..a2c1bda6a35 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -26,6 +26,8 @@ #include "unseq_backend_sycl.h" #include "utils_ranges_sycl.h" +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl From 1047d4a885a48e45a4c77f21e16b6cfa85ce2f95 Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Thu, 22 Feb 2024 16:48:41 +0100 Subject: [PATCH 27/29] [oneDPL][sycl][dpcpp] #include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. --- .../dpl/experimental/kt/internal/esimd_radix_sort_submitters.h | 1 + include/oneapi/dpl/internal/reduce_by_segment_impl.h | 1 + include/oneapi/dpl/internal/scan_by_segment_impl.h | 2 ++ include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 2 ++ .../dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h | 2 ++ .../pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h | 2 ++ 6 files changed, 10 insertions(+) diff --git a/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_submitters.h b/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_submitters.h index b6432826ebb..a44592166e0 100644 --- a/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_submitters.h +++ b/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_submitters.h @@ -18,6 +18,7 @@ #include "../../../pstl/hetero/dpcpp/utils_ranges_sycl.h" #include "../../../pstl/hetero/dpcpp/parallel_backend_sycl_utils.h" +#include "../../../pstl/hetero/dpcpp/sycl_traits.h" //SYCL traits specialization for some oneDPL types. #include "esimd_radix_sort_kernels.h" #include "esimd_defs.h" diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index 78986bca302..683decf3a49 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -59,6 +59,7 @@ #include "../pstl/ranges_defs.h" #include "../pstl/glue_algorithm_ranges_defs.h" #include "../pstl/glue_algorithm_ranges_impl.h" +#include "../pstl/hetero/dpcpp/sycl_traits.h" //SYCL traits specialization for some oneDPL types. #include "scan_by_segment_impl.h" #endif diff --git a/include/oneapi/dpl/internal/scan_by_segment_impl.h b/include/oneapi/dpl/internal/scan_by_segment_impl.h index 15606d05f8e..85787b38ac3 100644 --- a/include/oneapi/dpl/internal/scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/scan_by_segment_impl.h @@ -46,6 +46,8 @@ #include "../pstl/hetero/dpcpp/unseq_backend_sycl.h" #include "../pstl/hetero/dpcpp/parallel_backend_sycl_utils.h" +#include "../pstl/hetero/dpcpp/sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index d60d5b3626e..7cda428e542 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -40,6 +40,8 @@ # include "parallel_backend_sycl_radix_sort.h" #endif +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + namespace oneapi { namespace dpl diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index 4fd1aa1bef9..b46fb50c831 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -25,6 +25,8 @@ #include "parallel_backend_sycl_utils.h" #include "execution_sycl_defs.h" +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + #define _ONEDPL_RADIX_WORKLOAD_TUNING 1 //To achieve better performance, number of segments and work-group size are variated depending on a number of elements: //1. 32K...512K - number of segments is increased up to 8 times diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h index 6915f2b09e5..fbf80582d43 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort_one_wg.h @@ -16,6 +16,8 @@ #ifndef _ONEDPL_parallel_backend_sycl_radix_sort_one_wg_H #define _ONEDPL_parallel_backend_sycl_radix_sort_one_wg_H +#include "sycl_traits.h" //SYCL traits specialization for some oneDPL types. + //The file is an internal file and the code of that file is included by a major file into the following namespaces: //namespace oneapi //{ From db830066ec7056ceb0605329b302b30475294f5d Mon Sep 17 00:00:00 2001 From: Mikhail Dvorskiy Date: Mon, 8 Apr 2024 14:49:12 +0200 Subject: [PATCH 28/29] [oneDPL] removed _ONEDPL_DEVICE_COPYABLE(zip_forward_iterator) due to zip_forward_iteratoris not used in the device code --- include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h index ae99147ac77..7003dffc96a 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h @@ -55,7 +55,6 @@ _ONEDPL_DEVICE_COPYABLE(__transform_if_unary_functor) _ONEDPL_DEVICE_COPYABLE(__transform_if_binary_functor) _ONEDPL_DEVICE_COPYABLE(__replace_functor) _ONEDPL_DEVICE_COPYABLE(__replace_copy_functor) -_ONEDPL_DEVICE_COPYABLE(zip_forward_iterator) template struct fill_functor; From 5e984348990828188c17b2a49a0fe17ec4afab32 Mon Sep 17 00:00:00 2001 From: "Mateusz P. Nowak" Date: Mon, 8 Apr 2024 13:40:31 +0000 Subject: [PATCH 29/29] remove matrices and logs --- .../detail/format_shim.hpp | 13 - .../distributed_ranges_impl/detail/logger.hpp | 114 ----- .../internal/distributed_ranges_impl/shp.hpp | 5 +- .../shp/algorithms/algorithms.hpp | 1 - .../shp/algorithms/matrix/gemm.hpp | 245 ----------- .../shp/algorithms/matrix/gemv.hpp | 208 --------- .../shp/algorithms/matrix/local_gemm.hpp | 84 ---- .../shp/algorithms/matrix/local_gemv.hpp | 100 ----- .../algorithms/matrix/matrix_algorithms.hpp | 8 - .../containers/distributed_dense_matrix.hpp | 332 -------------- .../shp/containers/matrix_entry.hpp | 232 ---------- .../shp/containers/matrix_partition.hpp | 115 ----- .../containers/sequential/dense_matrix.hpp | 143 ------ .../shp/containers/sparse_matrix.hpp | 413 ------------------ .../distributed_ranges_impl/shp/util.hpp | 45 +- .../shp/util/coo_matrix.hpp | 170 ------- .../shp/util/generate_random.hpp | 92 ---- .../shp/util/matrix_io.hpp | 289 ------------ .../shp/views/csr_matrix_view.hpp | 225 ---------- .../shp/views/dense_column_view.hpp | 111 ----- .../shp/views/dense_matrix_iterator.hpp | 109 ----- .../shp/views/dense_matrix_view.hpp | 124 ------ .../shp/views/dense_row_view.hpp | 109 ----- .../shp/views/standard_views.hpp | 6 +- .../source_location/source_location.hpp | 65 --- .../common/distributed_vector.cpp | 13 - test/distributed-ranges/shp/CMakeLists.txt | 2 +- test/distributed-ranges/shp/gemv.cpp | 35 -- test/distributed-ranges/shp/xhp-tests.hpp | 5 +- 29 files changed, 32 insertions(+), 3381 deletions(-) delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/format_shim.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp delete mode 100644 include/oneapi/dpl/internal/distributed_ranges_impl/source_location/source_location.hpp delete mode 100644 test/distributed-ranges/shp/gemv.cpp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/format_shim.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/format_shim.hpp deleted file mode 100644 index 9eef4c8bb49..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/format_shim.hpp +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#ifdef DR_FORMAT -#include -#include -#endif - -// Workaround for doxygen warning about internal inconsistency -namespace fmt {} diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp deleted file mode 100644 index 2b342936425..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/detail/logger.hpp +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -#include "../source_location/source_location.hpp" - -#include "format_shim.hpp" -#include "ranges_shim.hpp" - -namespace experimental::dr { - -class timer { -public: - timer() : begin_(std::chrono::high_resolution_clock::now()) {} - - auto elapsed() { - auto end = std::chrono::high_resolution_clock::now(); - return std::chrono::duration(end - begin_).count(); - } - -private: - std::chrono::time_point begin_; -}; - -class logger { -public: - enum filters { base, for_each, transpose, mdspan_view, mpi, last }; - - logger() { rng::fill(enabled_, true); } - - void set_file(std::ofstream &fout) { fout_ = &fout; } - - void filter(const std::vector &names) { - if (names.size() == 0) { - return; - } - - // Disable everything - rng::fill(enabled_, false); - - // Enabled selected filters - for (const auto &name : names) { - std::size_t index = filters::last; - for (std::size_t i = 0; i < filter_names_.size(); i++) { - if (name == filter_names_[i]) { - index = i; - } - } - if (index == filters::last) { - std::cerr << "Ignoring unrecognized filter: " << name << "\n"; - } else { - enabled_[index] = true; - } - } - } - -#ifdef DR_FORMAT - - template - void debug(const nostd::source_location &location, - fmt::format_string format, Args &&...args) { - if (fout_ && enabled_[filters::base]) { - *fout_ << fmt::format(format, std::forward(args)...) << " <" - << location.file_name() << ":" << location.line() << ">\n"; - fout_->flush(); - } - } - - template - void debug(fmt::format_string format, Args &&...args) { - debug(filters::base, format, std::forward(args)...); - } - - template - void debug(filters filter, fmt::format_string format, - Args &&...args) { - if (fout_ && enabled_[filter]) { - *fout_ << fmt::format(format, std::forward(args)...); - fout_->flush(); - } - } - -#else - - template - void debug(const nostd::source_location &location, std::string format, - Args &&...args) {} - - template void debug(std::string format, Args &&...args) {} - - template - void debug(filters filter, std::string format, Args &&...args) {} - -#endif - -private: - std::ofstream *fout_ = nullptr; - std::array enabled_; - std::array filter_names_ = { - "base", "for_each", "transpose", "mdspan_view", "mpi"}; -}; - -inline logger drlog; - -#define DRLOG(...) \ - experimental::dr::drlog.debug(nostd::source_location::current(), __VA_ARGS__) - -} // namespace experimental::dr diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp index 4874f553c86..38795eae3d6 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp.hpp @@ -4,10 +4,8 @@ #pragma once -#include "detail/logger.hpp" +// #include "detail/logger.hpp" #include "shp/algorithms/algorithms.hpp" -#include "shp/containers/distributed_dense_matrix.hpp" -#include "shp/containers/sparse_matrix.hpp" #include "shp/detail.hpp" #include "shp/distributed_span.hpp" #include "shp/distributed_vector.hpp" @@ -15,6 +13,5 @@ #include "shp/range.hpp" #include "shp/range_adaptors.hpp" #include "shp/util.hpp" -#include "shp/util/matrix_io.hpp" #include "shp/views/views.hpp" #include "views/views.hpp" diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/algorithms.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/algorithms.hpp index a79d46451b3..369896d2d27 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/algorithms.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/algorithms.hpp @@ -11,7 +11,6 @@ #include "for_each.hpp" #include "inclusive_scan.hpp" #include "iota.hpp" -#include "matrix/matrix_algorithms.hpp" #include "reduce.hpp" #include "sort.hpp" #include "transform.hpp" diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp deleted file mode 100644 index 21f5a803e72..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemm.hpp +++ /dev/null @@ -1,245 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include - -namespace experimental::dr::shp { - -template -void gemm(distributed_dense_matrix &a, distributed_dense_matrix &b, - distributed_dense_matrix &c) { - gemm_buffered(a, b, c); -} - -template -void gemm_inplace(distributed_dense_matrix &a, - distributed_dense_matrix &b, - distributed_dense_matrix &c) { - // Matrix dimensions must match (algorithm requirement) - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - // Tile grid dimensions must match (implementation limitation) - - assert(c.grid_shape()[0] == a.grid_shape()[0]); - assert(c.grid_shape()[1] == b.grid_shape()[1]); - assert(a.grid_shape()[1] == b.grid_shape()[0]); - - std::vector events; - events.reserve(c.grid_shape()[0] * c.grid_shape()[1] * a.grid_shape()[1]); - - for (std::size_t i = 0; i < c.grid_shape()[0]; i++) { - for (std::size_t j = 0; j < c.grid_shape()[1]; j++) { - // For each tile of the output C matrix - auto &&c_tile = c.tile({i, j}); - - std::vector local_events; - local_events.reserve(a.grid_shape()[1]); - - std::size_t k_offset = i + j; - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - - auto &&a_tile = a.tile({i, k}); - auto &&b_tile = b.tile({k, j}); - - auto &&q = __detail::queue(experimental::dr::ranges::rank(c_tile)); - - auto e = __detail::local_gemm(q, __detail::local(a_tile), - __detail::local(b_tile), - __detail::local(c_tile), local_events); - - local_events.push_back(e); - } - - for (auto &&e : local_events) { - events.push_back(e); - } - } - } - - __detail::wait(events); -} - -template -void gemm_buffered(distributed_dense_matrix &a, - distributed_dense_matrix &b, - distributed_dense_matrix &c) { - // Matrix dimensions must match (algorithm requirement) - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - // Tile grid dimensions must match (implementation limitation) - - assert(c.grid_shape()[0] == a.grid_shape()[0]); - assert(c.grid_shape()[1] == b.grid_shape()[1]); - assert(a.grid_shape()[1] == b.grid_shape()[0]); - - std::vector threads; - - std::atomic communication = 0; - std::atomic compute = 0; - - for (std::size_t i = 0; i < c.grid_shape()[0]; i++) { - for (std::size_t j = 0; j < c.grid_shape()[1]; j++) { - auto c_local = c.tile({i, j}); - - threads.emplace_back([c_local, i, j, &a, &b, &communication, &compute] { - auto &&q = __detail::queue(experimental::dr::ranges::rank(c_local)); - - std::size_t a_elem = a.tile_shape()[0] * a.tile_shape()[1]; - std::size_t b_elem = b.tile_shape()[0] * b.tile_shape()[1]; - std::size_t buffer_size = std::max(a_elem, b_elem); - - experimental::dr::shp::device_allocator gpu_allocator(q); - experimental::dr::shp::buffered_allocator buffered_allocator(gpu_allocator, - buffer_size, 2); - auto &&allocator = buffered_allocator; - - std::size_t k_offset = i + j; - - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - - auto begin = std::chrono::high_resolution_clock::now(); - auto a_tile = a.get_tile({i, k}, allocator); - auto b_tile = b.get_tile({k, j}, allocator); - auto end = std::chrono::high_resolution_clock::now(); - double duration = std::chrono::duration(end - begin).count(); - communication += duration; - - experimental::dr::shp::dense_matrix_view a_local(a_tile); - experimental::dr::shp::dense_matrix_view b_local(b_tile); - - begin = std::chrono::high_resolution_clock::now(); - __detail::local_gemm(q, __detail::local(a_local), - __detail::local(b_local), - __detail::local(c_local)) - .wait(); - end = std::chrono::high_resolution_clock::now(); - duration = std::chrono::duration(end - begin).count(); - compute += duration; - } - }); - } - } - - for (auto &&t : threads) { - t.join(); - } - - bool debug_print = false; - - if (debug_print) { - std::cout << "communication total: " << (double)communication << std::endl; - std::cout << "compute total: " << (double)compute << std::endl; - } -} - -template -void gemm_buffered_async(distributed_dense_matrix &a, - distributed_dense_matrix &b, - distributed_dense_matrix &c) { - // Matrix dimensions must match (algorithm requirement) - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - // Tile grid dimensions must match (implementation limitation) - - assert(c.grid_shape()[0] == a.grid_shape()[0]); - assert(c.grid_shape()[1] == b.grid_shape()[1]); - assert(a.grid_shape()[1] == b.grid_shape()[0]); - - std::vector threads; - - std::atomic issue = 0; - std::atomic sync = 0; - std::atomic compute = 0; - - for (std::size_t i = 0; i < c.grid_shape()[0]; i++) { - for (std::size_t j = 0; j < c.grid_shape()[1]; j++) { - auto c_local = c.tile({i, j}); - - threads.emplace_back([c_local, i, j, &a, &b, &issue, &sync, &compute] { - auto &&q = __detail::queue(experimental::dr::ranges::rank(c_local)); - - std::size_t a_elem = a.tile_shape()[0] * a.tile_shape()[1]; - std::size_t b_elem = b.tile_shape()[0] * b.tile_shape()[1]; - std::size_t buffer_size = std::max(a_elem, b_elem); - - experimental::dr::shp::device_allocator gpu_allocator(q); - experimental::dr::shp::buffered_allocator buffered_allocator(gpu_allocator, - buffer_size, 4); - auto &&allocator = buffered_allocator; - - std::size_t k_offset = i + j; - - auto begin = std::chrono::high_resolution_clock::now(); - auto a_f = - a.get_tile_async({i, k_offset % a.grid_shape()[1]}, allocator); - // a_f.wait(); - auto b_f = - b.get_tile_async({k_offset % a.grid_shape()[1], j}, allocator); - // b_f.wait(); - auto end = std::chrono::high_resolution_clock::now(); - double duration = std::chrono::duration(end - begin).count(); - issue += duration; - - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - - auto begin = std::chrono::high_resolution_clock::now(); - auto a_tile = a_f.get(); - auto b_tile = b_f.get(); - auto end = std::chrono::high_resolution_clock::now(); - double duration = std::chrono::duration(end - begin).count(); - sync += duration; - - experimental::dr::shp::dense_matrix_view a_local(a_tile); - experimental::dr::shp::dense_matrix_view b_local(b_tile); - - if (k_ + 1 < a.grid_shape()[1]) { - begin = std::chrono::high_resolution_clock::now(); - a_f = a.get_tile_async({i, (k + 1) % a.grid_shape()[1]}, allocator); - // a_f.wait(); - b_f = b.get_tile_async({(k + 1) % a.grid_shape()[1], j}, allocator); - // b_f.wait(); - end = std::chrono::high_resolution_clock::now(); - duration = std::chrono::duration(end - begin).count(); - issue += duration; - } - - begin = std::chrono::high_resolution_clock::now(); - __detail::local_gemm(q, __detail::local(a_local), - __detail::local(b_local), - __detail::local(c_local)) - .wait(); - end = std::chrono::high_resolution_clock::now(); - duration = std::chrono::duration(end - begin).count(); - compute += duration; - } - }); - } - } - - for (auto &&t : threads) { - t.join(); - } - - bool debug_print = false; - - if (debug_print) { - std::cout << "sync total: " << (double)sync << std::endl; - std::cout << "issue total: " << (double)issue << std::endl; - std::cout << "compute total: " << (double)compute << std::endl; - } -} - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp deleted file mode 100644 index f90aa1a0e41..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/gemv.hpp +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace experimental::dr::shp { - -template -void flat_gemv(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b) { - assert(c.size() == b.size()); - assert(a.shape()[1] == b.size()); - assert(a.grid_shape()[0] == c.segments().size()); - assert(a.grid_shape()[1] == 1); - - auto &&devices = experimental::dr::shp::devices(); - - using b_scalar_type = rng::range_value_t; - - using local_vector_type = - experimental::dr::shp::device_vector>; - - std::vector local_b; - std::vector copy_events; - std::vector comp_events; - - for (std::size_t i = 0; i < devices.size(); i++) { - experimental::dr::shp::device_allocator allocator(experimental::dr::shp::context(), devices[i]); - local_b.push_back(local_vector_type(b.size(), allocator, i)); - } - - for (auto &&l_b : local_b) { - auto event = - experimental::dr::shp::copy_async(b.begin(), b.end(), experimental::dr::ranges::local(l_b.begin())); - copy_events.push_back(event); - } - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - auto a_tile = a.tile(experimental::dr::index(i, 0)); - - auto a_iter = a_tile.begin(); - auto b_iter = experimental::dr::ranges::local(local_b[i].begin()); - auto c_iter = experimental::dr::ranges::local(c.segments()[i].begin()); - - auto &&q = __detail::queue(a_tile.rank()); - - auto event = q.submit([&](auto &&h) { - h.depends_on(copy_events[a_tile.rank()]); - h.parallel_for(a_tile.size(), [=](auto idx) { - auto &&[index, a_v] = *(a_iter + idx); - auto &&[i, k] = index; - auto &&b_v = *(b_iter + k); - auto &&c_v = *(c_iter + i); - sycl::atomic_ref - c_ref(c_v); - c_ref += a_v * b_v; - }); - }); - comp_events.push_back(event); - } - - __detail::wait(comp_events); -} - -template -void gemv(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b, - shp::duplicated_vector> &scratch) { - assert(c.size() == b.size()); - assert(a.shape()[1] == b.size()); - assert(a.grid_shape()[0] == c.segments().size()); - assert(a.grid_shape()[1] == 1); - - auto &&b_duplicated = scratch; - - std::vector copy_events; - std::vector comp_events; - copy_events.reserve(shp::nprocs()); - comp_events.reserve(a.grid_shape()[0]); - - for (std::size_t i = 0; i < shp::nprocs(); i++) { - auto &&l_b = b_duplicated.local_vector(i); - auto event = experimental::dr::shp::copy_async(b.begin(), b.end(), l_b.begin()); - copy_events.push_back(event); - } - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - auto a_tile = a.tile(experimental::dr::index(i, 0)); - - auto b_iter = - experimental::dr::ranges::local(b_duplicated.local_vector(a_tile.rank()).begin()); - auto c_iter = experimental::dr::ranges::local(c.segments()[i].begin()); - - auto &&q = __detail::queue(a_tile.rank()); - - auto event = __detail::local_gemv(q, a_tile, b_iter, c_iter, - {copy_events[a_tile.rank()]}); - comp_events.push_back(event); - } - - __detail::wait(comp_events); -} - -template -void gemv(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b) { - experimental::dr::shp::duplicated_vector> b_duplicated(b.size()); - - gemv(c, a, b, b_duplicated); -} - -template -void gemv_square(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b) { - assert(a.shape()[0] == c.size()); - assert(a.shape()[1] == b.size()); - assert(a.grid_shape()[0] == c.segments().size()); - assert(a.grid_shape()[1] == b.segments().size()); - - std::vector events; - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - std::size_t k_offset = i; - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - auto a_tile = a.tile(experimental::dr::index(i, k)); - auto b_segment = b.segments()[k]; - auto c_segment = c.segments()[i]; - - auto b_iter = experimental::dr::ranges::local(b_segment.begin()); - auto c_iter = experimental::dr::ranges::local(c_segment.begin()); - - auto &&q = __detail::queue(a_tile.rank()); - - auto event = __detail::custom_gemv(q, a_tile, b_iter, c_iter); - events.push_back(event); - } - } - - __detail::wait(events); -} - -template -void gemv_square_copy(C &&c, experimental::dr::shp::sparse_matrix &a, B &&b) { - assert(a.shape()[0] == c.size()); - assert(a.shape()[1] == b.size()); - assert(a.grid_shape()[0] == c.segments().size()); - assert(a.grid_shape()[1] == b.segments().size()); - - auto &&devices = experimental::dr::shp::devices(); - - using b_scalar_type = rng::range_value_t; - - using local_vector_type = - experimental::dr::shp::device_vector>; - - std::vector local_b; - std::vector events; - - local_b.reserve(a.grid_shape()[0]); - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - experimental::dr::shp::device_allocator allocator( - experimental::dr::shp::context(), devices[a.tile(experimental::dr::index(i, 0)).rank()]); - local_b.emplace_back(b.size(), allocator, - a.tile(experimental::dr::index(i, 0)).rank()); - } - - for (std::size_t i = 0; i < a.grid_shape()[0]; i++) { - std::size_t k_offset = i; - for (std::size_t k_ = 0; k_ < a.grid_shape()[1]; k_++) { - std::size_t k = (k_ + k_offset) % a.grid_shape()[1]; - auto a_tile = a.tile({i, k}); - auto b_iter = local_b[i].begin() + (k * a.tile_shape()[1]); - auto c_iter = c.segments()[i].begin(); - - auto &&b_segment = b.segments()[k]; - auto &&q = __detail::queue(a_tile.rank()); - - auto ce = - experimental::dr::shp::copy_async(q, b_segment.begin(), b_segment.end(), b_iter); - - auto event = __detail::custom_gemv(q, a_tile, b_iter.local(), - c_iter.local(), {ce}); - - events.push_back(event); - } - } - - __detail::wait(events); -} - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp deleted file mode 100644 index f124d2f2231..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemm.hpp +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#ifdef USE_MKL -#include -#endif - -namespace experimental::dr::shp { - -namespace __detail { - -template -auto custom_gemm(sycl::queue &q, shp::dense_matrix_view a, - shp::dense_matrix_view b, shp::dense_matrix_view c, - const std::vector &dependencies = {}) { - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - std::size_t M = c.shape()[0]; - std::size_t N = c.shape()[1]; - std::size_t K = a.shape()[1]; - - auto a_p = a.data(); - auto b_p = b.data(); - auto c_p = c.data(); - - auto e = q.parallel_for(sycl::range<3>{M, K, N}, [=](auto idx) { - auto i = idx[0]; - auto k = idx[1]; - auto j = idx[2]; - - sycl::atomic_ref - c_ref(c_p[i * N + j]); - - c_ref += a_p[i * K + k] * b_p[k * N + j]; - }); - return e; -} - -#ifdef USE_MKL - -template -auto mkl_gemm(sycl::queue &q, shp::dense_matrix_view a, - shp::dense_matrix_view b, shp::dense_matrix_view c, - const std::vector &dependencies = {}) { - assert(c.shape()[0] == a.shape()[0]); - assert(c.shape()[1] == b.shape()[1]); - assert(a.shape()[1] == b.shape()[0]); - - auto event = oneapi::mkl::blas::row_major::gemm( - q, oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::nontrans, - c.shape()[0], c.shape()[1], a.shape()[1], T(1), a.data(), a.ld(), - b.data(), b.ld(), T(1), c.data(), c.ld(), dependencies); - - return event; -} - -template -auto local_gemm(sycl::queue &q, shp::dense_matrix_view a, - shp::dense_matrix_view b, shp::dense_matrix_view c, - const std::vector &dependencies = {}) { - return mkl_gemm(q, a, b, c, dependencies); -} - -#else - -template -auto local_gemm(sycl::queue &q, shp::dense_matrix_view a, - shp::dense_matrix_view b, shp::dense_matrix_view c, - const std::vector &dependencies = {}) { - return custom_gemm(q, a, b, c, dependencies); -} - -#endif - -} // namespace __detail - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp deleted file mode 100644 index a6e0d4cf59e..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/local_gemv.hpp +++ /dev/null @@ -1,100 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -#ifdef USE_MKL -#include -#endif - -namespace experimental::dr::shp { - -namespace __detail { - -template - requires(std::is_same_v, T>) -auto custom_gemv(sycl::queue &q, csr_matrix_view a, Iter b, - Iter c, const std::vector &dependencies = {}) { - std::size_t wg = 32; - - auto event = q.submit([&](auto &&h) { - h.depends_on(dependencies); - h.parallel_for(sycl::nd_range<1>(a.shape()[0] * wg, wg), [=](auto item) { - auto row_index = item.get_group(0); - auto local_id = item.get_local_id(); - auto group_size = item.get_local_range(0); - - auto row = a.row(row_index); - - for (std::size_t idx = local_id; idx < row.size(); idx += group_size) { - auto &&[index, a_v] = row[idx]; - auto &&[i, k] = index; - - auto &&b_v = *(b + k); - auto &&c_v = *(c + i); - - sycl::atomic_ref - c_ref(c_v); - - c_ref += a_v * b_v; - } - }); - }); - return event; -} - -#ifdef USE_MKL - -template - requires(std::is_same_v, T>) -auto mkl_gemv(sycl::queue &q, csr_matrix_view a, Iter b, Iter c, - const std::vector &dependencies = {}) { - - oneapi::mkl::sparse::matrix_handle_t a_handle; - oneapi::mkl::sparse::init_matrix_handle(&a_handle); - - auto rowptr = experimental::dr::shp::__detail::local(a.rowptr_data()); - auto colind = experimental::dr::shp::__detail::local(a.colind_data()); - auto values = experimental::dr::shp::__detail::local(a.values_data()); - - oneapi::mkl::sparse::set_csr_data(q, a_handle, a.shape()[0], a.shape()[1], - oneapi::mkl::index_base::zero, rowptr, - colind, values); - - auto event = - oneapi::mkl::sparse::gemv(q, oneapi::mkl::transpose::nontrans, T(1), - a_handle, b, T(1), c, dependencies); - return event; -} - -template - requires(std::is_same_v, T>) -auto local_gemv(sycl::queue &q, csr_matrix_view a, Iter b, - Iter c, const std::vector &dependencies = {}) { - return mkl_gemv(q, a, b, c, dependencies); -} - -#else - -template - requires(std::is_same_v, T>) -auto local_gemv(sycl::queue &q, csr_matrix_view a, Iter b, - Iter c, const std::vector &dependencies = {}) { - return custom_gemv(q, a, b, c, dependencies); -} - -#endif - -} // namespace __detail - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp deleted file mode 100644 index 36182acf517..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/algorithms/matrix/matrix_algorithms.hpp +++ /dev/null @@ -1,8 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp deleted file mode 100644 index d77caded1ff..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/distributed_dense_matrix.hpp +++ /dev/null @@ -1,332 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace experimental::dr::shp { - -template class distributed_dense_matrix_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_value_type = rng::range_value_t; - using scalar_reference = rng::range_reference_t; - - using value_type = experimental::dr::shp::matrix_entry; - - using reference = experimental::dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = distributed_dense_matrix_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using tile_type = L; - - using key_type = experimental::dr::index<>; - - constexpr distributed_dense_matrix_accessor() noexcept = default; - constexpr ~distributed_dense_matrix_accessor() noexcept = default; - constexpr distributed_dense_matrix_accessor( - const distributed_dense_matrix_accessor &) noexcept = default; - constexpr distributed_dense_matrix_accessor & - operator=(const distributed_dense_matrix_accessor &) noexcept = default; - - constexpr distributed_dense_matrix_accessor( - std::span tiles, key_type grid_idx, key_type tile_idx, - key_type grid_shape, key_type tile_shape, key_type matrix_shape) noexcept - : grid_idx_(grid_idx), tile_idx_(tile_idx), grid_shape_(grid_shape), - tile_shape_(tile_shape), matrix_shape_(matrix_shape), tiles_(tiles) {} - - constexpr distributed_dense_matrix_accessor & - operator+=(difference_type offset) noexcept { - std::size_t new_global_idx_ = get_global_idx_() + offset; - key_type new_global_idx = {new_global_idx_ / matrix_shape_[1], - new_global_idx_ % matrix_shape_[1]}; - key_type new_grid_idx = {new_global_idx[0] / tile_shape_[0], - new_global_idx[1] / tile_shape_[1]}; - - key_type new_tile_idx = {new_global_idx[0] % tile_shape_[0], - new_global_idx[1] % tile_shape_[1]}; - - grid_idx_ = new_grid_idx; - tile_idx_ = new_tile_idx; - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return grid_idx_ == other.grid_idx_ && tile_idx_ == other.tile_idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(get_global_idx_()) - other.get_global_idx_(); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - if (get_grid_idx() < other.get_grid_idx()) { - return true; - } else if (get_grid_idx() == other.get_grid_idx()) { - return get_local_idx() < other.get_local_idx(); - } else { - return false; - } - } - - constexpr reference operator*() const noexcept { - auto &&tile = tiles_[get_grid_idx()]; - auto &&value = tile[get_local_idx()]; - key_type idx = {tile_idx_[0] + grid_idx_[0] * tile_shape_[0], - tile_idx_[1] + grid_idx_[1] * tile_shape_[1]}; - return reference(idx, value); - } - -private: - size_type get_global_idx_() const noexcept { - auto gidx = get_global_idx(); - return gidx[0] * matrix_shape_[1] + gidx[1]; - } - - key_type get_global_idx() const noexcept { - return {grid_idx_[0] * tile_shape_[0] + tile_idx_[0], - grid_idx_[1] * tile_shape_[1] + tile_idx_[1]}; - } - - size_type get_grid_idx() const noexcept { - return grid_idx_[0] * grid_shape_[1] + grid_idx_[1]; - } - - size_type get_local_idx() const noexcept { - return tile_idx_[0] * tile_shape_[1] + tile_idx_[1]; - } - - size_type get_tile_size() const noexcept { - return tile_shape_[0] * tile_shape_[1]; - } - -private: - key_type grid_idx_; - key_type tile_idx_; - - key_type grid_shape_; - key_type tile_shape_; - key_type matrix_shape_; - - std::span tiles_; -}; - -template -using distributed_dense_matrix_iterator = - experimental::dr::iterator_adaptor>; - -template class distributed_dense_matrix { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using value_type = experimental::dr::shp::matrix_entry; - - using scalar_reference = rng::range_reference_t< - experimental::dr::shp::device_vector>>; - using const_scalar_reference = rng::range_reference_t< - const experimental::dr::shp::device_vector>>; - - using reference = experimental::dr::shp::matrix_ref; - using const_reference = experimental::dr::shp::matrix_ref; - - using key_type = experimental::dr::index<>; - - using iterator = distributed_dense_matrix_iterator< - T, experimental::dr::shp::device_vector>>; - - distributed_dense_matrix(key_type shape) - : shape_(shape), partition_(new experimental::dr::shp::block_cyclic()) { - init_(); - } - - distributed_dense_matrix(key_type shape, const matrix_partition &partition) - : shape_(shape), partition_(partition.clone()) { - init_(); - } - - size_type size() const noexcept { return shape()[0] * shape()[1]; } - - key_type shape() const noexcept { return shape_; } - - scalar_reference operator[](key_type index) { - std::size_t tile_i = index[0] / tile_shape_[0]; - std::size_t tile_j = index[1] / tile_shape_[1]; - - std::size_t local_i = index[0] % tile_shape_[0]; - std::size_t local_j = index[1] % tile_shape_[1]; - - auto &&tile = tiles_[tile_i * grid_shape_[1] + tile_j]; - - return tile[local_i * tile_shape_[1] + local_j]; - } - - const_scalar_reference operator[](key_type index) const { - std::size_t tile_i = index[0] / tile_shape_[0]; - std::size_t tile_j = index[1] / tile_shape_[1]; - - std::size_t local_i = index[0] % tile_shape_[0]; - std::size_t local_j = index[1] % tile_shape_[1]; - - auto &&tile = tiles_[tile_i * grid_shape_[1] + tile_j]; - - return tile[local_i * tile_shape_[1] + local_j]; - } - - iterator begin() { - return iterator(tiles_, key_type({0, 0}), key_type({0, 0}), grid_shape_, - tile_shape_, shape_); - } - - iterator end() { return begin() + shape()[0] * shape()[1]; } - - key_type tile_shape() const noexcept { return tile_shape_; } - - key_type grid_shape() const noexcept { return grid_shape_; } - - auto tile(key_type tile_index) { - auto &&[i, j] = tile_index; - auto iter = tiles_[i * grid_shape()[1] + j].begin(); - - std::size_t tm = - std::min(tile_shape()[0], shape()[0] - i * tile_shape()[0]); - std::size_t tn = - std::min(tile_shape()[1], shape()[1] - j * tile_shape()[1]); - - return dense_matrix_view>>>( - iter, key_type{tm, tn}, tile_shape()[1], - tiles_[i * grid_shape()[1] + j].rank()); - } - - std::vector>>>> - tiles() { - std::vector>>>> - views_; - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - auto iter = tiles_[i * grid_shape_[1] + j].begin(); - - std::size_t tm = - std::min(tile_shape_[0], shape()[0] - i * tile_shape_[0]); - std::size_t tn = - std::min(tile_shape_[1], shape()[1] - j * tile_shape_[1]); - - views_.emplace_back(iter, key_type{tm, tn}, tile_shape_[1], - tiles_[i * grid_shape_[1] + j].rank()); - } - } - return views_; - } - - template > - auto get_tile(key_type tile_index, const Allocator &alloc = Allocator{}) { - std::size_t nrows = get_tile_shape_(tile_index)[0]; - std::size_t ld = tile_shape_[1]; - std::size_t tile_size = nrows * ld; - dense_matrix local_tile(get_tile_shape_(tile_index), ld, - alloc); - auto remote_tile = tile(tile_index); - shp::copy(remote_tile.data(), remote_tile.data() + tile_size, - local_tile.data()); - return local_tile; - } - - template > - auto get_tile_async(key_type tile_index, - const Allocator &alloc = Allocator{}) { - std::size_t nrows = get_tile_shape_(tile_index)[0]; - std::size_t ld = tile_shape_[1]; - std::size_t tile_size = nrows * ld; - dense_matrix local_tile(get_tile_shape_(tile_index), ld, - alloc); - auto remote_tile = tile(tile_index); - auto event = shp::copy_async( - remote_tile.data(), remote_tile.data() + tile_size, local_tile.data()); - return future(std::move(local_tile), {event}); - } - - auto segments() { - std::vector>>>> - views_; - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - auto iter = tiles_[i * grid_shape_[1] + j].begin(); - - std::size_t tm = - std::min(tile_shape_[0], shape()[0] - i * tile_shape_[0]); - std::size_t tn = - std::min(tile_shape_[1], shape()[1] - j * tile_shape_[1]); - - std::size_t m_offset = i * tile_shape_[0]; - std::size_t n_offset = j * tile_shape_[1]; - - views_.emplace_back(iter, key_type{tm, tn}, - key_type{m_offset, n_offset}, tile_shape_[1], - tiles_[i * grid_shape_[1] + j].rank()); - } - } - return experimental::dr::__detail::owning_view(std::move(views_)); - } - -private: - void init_() { - grid_shape_ = partition_->grid_shape(shape()); - tile_shape_ = partition_->tile_shape(shape()); - - tiles_.reserve(grid_shape_[0] * grid_shape_[1]); - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t rank = partition_->tile_rank(shape(), {i, j}); - - auto device = experimental::dr::shp::devices()[rank]; - experimental::dr::shp::device_allocator alloc(experimental::dr::shp::context(), device); - - std::size_t tile_size = tile_shape_[0] * tile_shape_[1]; - - tiles_.emplace_back(tile_size, alloc, rank); - } - } - } - - key_type get_tile_shape_(key_type tile_index) { - auto &&[i, j] = tile_index; - std::size_t tm = std::min(tile_shape_[0], shape()[0] - i * tile_shape_[0]); - std::size_t tn = std::min(tile_shape_[1], shape()[1] - j * tile_shape_[1]); - return key_type{tm, tn}; - } - -private: - key_type shape_; - key_type grid_shape_; - key_type tile_shape_; - std::unique_ptr partition_; - - std::vector>> tiles_; -}; - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp deleted file mode 100644 index 538dd09c172..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_entry.hpp +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -#include - -namespace experimental::dr::shp { - -template class matrix_entry { -public: - using index_type = I; - using map_type = T; - - matrix_entry(experimental::dr::index index, const map_type &value) - : index_(index), value_(value) {} - matrix_entry(experimental::dr::index index, map_type &&value) - : index_(index), value_(std::move(value)) {} - - template - requires(std::is_constructible_v) - matrix_entry(experimental::dr::index index, U &&value) - : index_(index), value_(std::forward(value)) {} - - template - matrix_entry(Entry &&entry) - : index_(std::get<0>(entry)), value_(std::get<1>(entry)) {} - - template auto get() const noexcept { - if constexpr (Index == 0) { - return index(); - } - if constexpr (Index == 1) { - return value(); - } - } - - operator std::pair, T>() const noexcept { - return {{index_[0], index_[1]}, value_}; - } - - experimental::dr::index index() const noexcept { return index_; } - - map_type value() const noexcept { return value_; } - - template - requires(!std::is_same_v && - std::numeric_limits::max() >= std::numeric_limits::max()) - operator matrix_entry() const noexcept { - return matrix_entry(index_, value_); - } - - template - requires(!std::is_const_v && !std::is_same_v && - std::numeric_limits::max() >= std::numeric_limits::max()) - operator matrix_entry, U>() const noexcept { - return matrix_entry, U>(index_, value_); - } - - bool operator<(const matrix_entry &other) const noexcept { - if (index()[0] < other.index()[0]) { - return true; - } else if (index()[0] == other.index()[0] && - index()[1] < other.index()[1]) { - return true; - } - return false; - } - - matrix_entry() = default; - ~matrix_entry() = default; - - matrix_entry(const matrix_entry &) = default; - matrix_entry(matrix_entry &&) = default; - matrix_entry &operator=(const matrix_entry &) = default; - matrix_entry &operator=(matrix_entry &&) = default; - -private: - experimental::dr::index index_; - map_type value_; -}; - -} // namespace experimental::dr::shp - -namespace std { - -template - requires(!std::is_const_v) -void swap(experimental::dr::shp::matrix_entry a, experimental::dr::shp::matrix_entry b) { - experimental::dr::shp::matrix_entry other = a; - a = b; - b = other; -} - -template -struct tuple_element> - : tuple_element, T>> {}; - -template -struct tuple_size> : integral_constant { -}; - -} // namespace std - -namespace experimental::dr::shp { - -template -class matrix_ref { -public: - using scalar_type = T; - using index_type = I; - - using key_type = experimental::dr::index; - using map_type = T; - - using scalar_reference = TRef; - - using value_type = experimental::dr::shp::matrix_entry; - - matrix_ref(experimental::dr::index index, scalar_reference value) - : index_(index), value_(value) {} - - operator value_type() const noexcept { return value_type(index_, value_); } - - operator std::pair, T>() const noexcept { - return {{index_[0], index_[1]}, value_}; - } - - template - decltype(auto) get() const noexcept - requires(Index <= 1) - { - if constexpr (Index == 0) { - return index(); - } - if constexpr (Index == 1) { - return value(); - } - } - - experimental::dr::index index() const noexcept { return index_; } - - scalar_reference value() const noexcept { return value_; } - - template - requires(!std::is_same_v && - std::numeric_limits::max() >= std::numeric_limits::max()) - operator matrix_ref() const noexcept { - return matrix_ref(index_, value_); - } - - template - requires(!std::is_const_v && !std::is_same_v && - std::numeric_limits::max() >= std::numeric_limits::max()) - operator matrix_ref, U, TRef>() const noexcept { - return matrix_ref, U, TRef>(index_, value_); - } - - bool operator<(matrix_entry other) const noexcept { - if (index()[0] < other.index()[0]) { - return true; - } else if (index()[0] == other.index()[0] && - index()[1] < other.index()[1]) { - return true; - } - return false; - } - - matrix_ref() = delete; - ~matrix_ref() = default; - - matrix_ref(const matrix_ref &) = default; - matrix_ref &operator=(const matrix_ref &) = delete; - matrix_ref(matrix_ref &&) = default; - matrix_ref &operator=(matrix_ref &&) = default; - -private: - experimental::dr::index index_; - scalar_reference value_; -}; - -} // namespace experimental::dr::shp - -namespace std { - -template - requires(!std::is_const_v) -void swap(experimental::dr::shp::matrix_ref a, - experimental::dr::shp::matrix_ref b) { - experimental::dr::shp::matrix_entry other = a; - a = b; - b = other; -} - -template -struct tuple_element> - : tuple_element, TRef>> {}; - -template -struct tuple_size> - : integral_constant {}; - -template -inline decltype(auto) get(experimental::dr::shp::matrix_ref ref) - requires(Index <= 1) -{ - if constexpr (Index == 0) { - return ref.index(); - } - if constexpr (Index == 1) { - return ref.value(); - } -} - -template -inline decltype(auto) get(experimental::dr::shp::matrix_entry entry) - requires(Index <= 1) -{ - if constexpr (Index == 0) { - return entry.index(); - } - if constexpr (Index == 1) { - return entry.value(); - } -} - -} // namespace std diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp deleted file mode 100644 index fa38ce43b1c..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/matrix_partition.hpp +++ /dev/null @@ -1,115 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -namespace experimental::dr::shp { - -namespace tile { - -// Special constant to indicate tile dimensions of -// {ceil(m / p_m), ceil(n / p_n)} should be chosen -// in order to evenly divide a dimension amongst the -// ranks in the processor grid. -inline constexpr std::size_t div = std::numeric_limits::max(); - -} // namespace tile - -class matrix_partition { -public: - virtual std::size_t tile_rank(experimental::dr::index<> matrix_shape, - experimental::dr::index<> tile_id) const = 0; - virtual experimental::dr::index<> grid_shape(experimental::dr::index<> matrix_shape) const = 0; - virtual experimental::dr::index<> tile_shape(experimental::dr::index<> matrix_shape) const = 0; - - virtual std::unique_ptr clone() const = 0; - virtual ~matrix_partition(){}; -}; - -class block_cyclic final : public matrix_partition { -public: - block_cyclic(experimental::dr::index<> tile_shape = {experimental::dr::shp::tile::div, - experimental::dr::shp::tile::div}, - experimental::dr::index<> grid_shape = detail::factor(experimental::dr::shp::nprocs())) - : tile_shape_(tile_shape), grid_shape_(grid_shape) {} - - block_cyclic(const block_cyclic &) noexcept = default; - - experimental::dr::index<> tile_shape() const { return tile_shape_; } - - std::size_t tile_rank(experimental::dr::index<> matrix_shape, experimental::dr::index<> tile_id) const { - experimental::dr::index<> pgrid_idx = {tile_id[0] % grid_shape_[0], - tile_id[1] % grid_shape_[1]}; - - auto pgrid = processor_grid_(); - - return pgrid[pgrid_idx[0] * grid_shape_[1] + pgrid_idx[1]]; - } - - experimental::dr::index<> grid_shape(experimental::dr::index<> matrix_shape) const { - auto ts = this->tile_shape(matrix_shape); - - return experimental::dr::index<>((matrix_shape[0] + ts[0] - 1) / ts[0], - (matrix_shape[1] + ts[1] - 1) / ts[1]); - } - - experimental::dr::index<> tile_shape(experimental::dr::index<> matrix_shape) const { - std::array tshape = {tile_shape_[0], tile_shape_[1]}; - - constexpr std::size_t ndims = 2; - for (std::size_t i = 0; i < ndims; i++) { - if (tshape[i] == experimental::dr::shp::tile::div) { - tshape[i] = (matrix_shape[i] + grid_shape_[i] - 1) / grid_shape_[i]; - } - } - - return tshape; - } - - std::unique_ptr clone() const noexcept { - return std::unique_ptr(new block_cyclic(*this)); - } - -private: - std::vector processor_grid_() const { - std::vector grid(grid_shape_[0] * grid_shape_[1]); - - for (std::size_t i = 0; i < grid.size(); i++) { - grid[i] = i; - } - return grid; - } - - experimental::dr::index<> tile_shape_; - experimental::dr::index<> grid_shape_; -}; // namespace experimental::dr::shp - -inline std::vector partition_matmul(std::size_t m, std::size_t n, - std::size_t k) { - experimental::dr::index<> c_pgrid = detail::factor(shp::nprocs()); - - block_cyclic c_block({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, - {c_pgrid[0], c_pgrid[1]}); - - std::size_t k_block; - - if (m * k >= k * n) { - k_block = (shp::nprocs() + c_pgrid[0] - 1) / c_pgrid[0]; - } else { - k_block = (shp::nprocs() + c_pgrid[1] - 1) / c_pgrid[1]; - } - - block_cyclic a_block({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, - {c_pgrid[0], k_block}); - block_cyclic b_block({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, - {k_block, c_pgrid[1]}); - - return {a_block, b_block, c_block}; -} - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp deleted file mode 100644 index 1a08e3ecbd3..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sequential/dense_matrix.hpp +++ /dev/null @@ -1,143 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include - -namespace experimental::dr::shp { - -template > -class dense_matrix { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using allocator_type = Allocator; - - using scalar_pointer = typename std::allocator_traits::pointer; - - using scalar_reference = std::iter_reference_t; - using reference = experimental::dr::shp::matrix_ref; - - using key_type = experimental::dr::index<>; - using map_type = T; - - using iterator = dense_matrix_iterator; - - dense_matrix(key_type shape) - : allocator_(Allocator()), shape_(shape), ld_(shape[1]) { - data_ = allocator_.allocate(shape_[0] * shape_[1]); - } - - dense_matrix(key_type shape, std::size_t ld) - requires(std::is_default_constructible_v) - : allocator_(Allocator()), shape_(shape), ld_(ld) { - data_ = allocator_.allocate(shape_[0] * ld_); - } - - dense_matrix(key_type shape, std::size_t ld, const Allocator &alloc) - : allocator_(alloc), shape_(shape), ld_(ld) { - data_ = allocator_.allocate(shape_[0] * ld_); - } - - dense_matrix(dense_matrix &&other) - : allocator_(other.allocator_), data_(other.data_), shape_(other.shape_), - ld_(other.ld_) { - other.null_data_(); - } - - dense_matrix &operator=(dense_matrix &&other) { - deallocate_storage_(); - allocator_ = other.allocator_; - data_ = other.data_; - shape_ = other.shape_; - ld_ = other.ld_; - - other.null_data_(); - } - - dense_matrix(const dense_matrix &other) = delete; - dense_matrix &operator=(const dense_matrix &other) = delete; - - ~dense_matrix() { deallocate_storage_(); } - - key_type shape() const noexcept { return shape_; } - - size_type size() const noexcept { return shape()[0] * shape()[1]; } - - scalar_reference operator[](key_type idx) const { - return data_[idx[0] * ld_ + idx[1]]; - } - - iterator begin() const { - return iterator(data_, key_type{0, 0}, shape_, ld_); - } - - iterator end() const { - return iterator(data_, key_type{shape_[0], 0}, shape_, ld_); - } - - auto row(size_type row_index) const { - // return dense_matrix_row_view(data_ + row_index * ld_, row_index, - // shape()[1]); - auto row_elements = rng::views::iota(size_type(0), size_type(shape()[1])); - scalar_pointer data = data_ + row_index * ld_; - - return row_elements | rng::views::transform([=](auto column_index) { - return reference(key_type(row_index, column_index), - data[column_index]); - }); - } - - auto column(size_type column_index) const { - // return dense_matrix_column_view(data_ + column_index, column_index, - // shape()[0], ld_); - auto column_elements = - rng::views::iota(size_type(0), size_type(shape()[0])); - scalar_pointer data = data_ + column_index; - size_type ld = ld_; - - return column_elements | rng::views::transform([=](auto row_index) { - return reference(key_type(row_index, column_index), - data[row_index * ld]); - }); - } - - scalar_pointer data() const { return data_; } - - size_type ld() const { return ld_; } - - /* - auto local() const { - } - */ - -private: - void deallocate_storage_() { - if (data_ != nullptr) { - allocator_.deallocate(data_, shape_[0] * ld_); - } - } - - void null_data_() { - data_ = nullptr; - shape_ = {0, 0}; - ld_ = 0; - } - - allocator_type allocator_; - scalar_pointer data_; - key_type shape_; - size_type ld_; -}; - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp deleted file mode 100644 index a96f4555256..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/containers/sparse_matrix.hpp +++ /dev/null @@ -1,413 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace experimental::dr::shp { - -template - requires(rng::viewable_range) -class distributed_range_accessor { -public: - using segment_type = rng::range_value_t; - - using value_type = rng::range_value_t; - - using size_type = rng::range_size_t; - using difference_type = rng::range_difference_t; - - using reference = rng::range_reference_t; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = distributed_range_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - constexpr distributed_range_accessor() noexcept = default; - constexpr ~distributed_range_accessor() noexcept = default; - constexpr distributed_range_accessor( - const distributed_range_accessor &) noexcept = default; - constexpr distributed_range_accessor & - operator=(const distributed_range_accessor &) noexcept = default; - - constexpr distributed_range_accessor(Segments segments, size_type segment_id, - size_type idx) noexcept - : segments_(rng::views::all(std::forward(segments))), - segment_id_(segment_id), idx_(idx) {} - - constexpr distributed_range_accessor & - operator+=(difference_type offset) noexcept { - - while (offset > 0) { - difference_type current_offset = std::min( - offset, - difference_type(rng::size(*(segments_.begin() + segment_id_))) - - difference_type(idx_)); - idx_ += current_offset; - offset -= current_offset; - - if (idx_ >= rng::size((*(segments_.begin() + segment_id_)))) { - segment_id_++; - idx_ = 0; - } - } - - while (offset < 0) { - difference_type current_offset = - std::min(-offset, difference_type(idx_) + 1); - - difference_type new_idx = difference_type(idx_) - current_offset; - - if (new_idx < 0) { - segment_id_--; - new_idx = rng::size(*(segments_.begin() + segment_id_)) - 1; - } - - idx_ = new_idx; - } - - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return segment_id_ == other.segment_id_ && idx_ == other.idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(get_global_idx()) - other.get_global_idx(); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - if (segment_id_ < other.segment_id_) { - return true; - } else if (segment_id_ == other.segment_id_) { - return idx_ < other.idx_; - } else { - return false; - } - } - - constexpr reference operator*() const noexcept { - return *((*(segments_.begin() + segment_id_)).begin() + idx_); - } - -private: - size_type get_global_idx() const noexcept { - size_type cumulative_size = 0; - for (std::size_t i = 0; i < segment_id_; i++) { - cumulative_size += segments_[i].size(); - } - return cumulative_size + idx_; - } - - rng::views::all_t segments_; - size_type segment_id_ = 0; - size_type idx_ = 0; -}; - -template -using distributed_sparse_matrix_iterator = - experimental::dr::iterator_adaptor>; - -template class sparse_matrix { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using value_type = experimental::dr::shp::matrix_entry; - - using scalar_reference = rng::range_reference_t< - experimental::dr::shp::device_vector>>; - using const_scalar_reference = rng::range_reference_t< - const experimental::dr::shp::device_vector>>; - - using reference = experimental::dr::shp::matrix_ref; - using const_reference = experimental::dr::shp::matrix_ref; - - using key_type = experimental::dr::index; - - using segment_type = experimental::dr::shp::csr_matrix_view< - T, I, - rng::iterator_t>>, - rng::iterator_t>>>; - - // using iterator = sparse_matrix_iterator>>; - using iterator = - distributed_sparse_matrix_iterator &&>; - - sparse_matrix(key_type shape) - : shape_(shape), partition_(new experimental::dr::shp::block_cyclic()) { - init_(); - } - - sparse_matrix(key_type shape, double density) - : shape_(shape), partition_(new experimental::dr::shp::block_cyclic()) { - init_random_(density); - } - - sparse_matrix(key_type shape, double density, - const matrix_partition &partition) - : shape_(shape), partition_(partition.clone()) { - init_random_(density); - } - - sparse_matrix(key_type shape, const matrix_partition &partition) - : shape_(shape), partition_(partition.clone()) { - init_(); - } - - size_type size() const noexcept { return total_nnz_; } - - key_type shape() const noexcept { return shape_; } - - iterator begin() { return iterator(segments(), 0, 0); } - - iterator end() { - return iterator(segments(), grid_shape_[0] * grid_shape_[1], 0); - } - - segment_type tile(key_type tile_index) { - std::size_t tile_idx = tile_index[0] * grid_shape_[1] + tile_index[1]; - auto values = values_[tile_idx].begin(); - auto rowptr = rowptr_[tile_idx].begin(); - auto colind = colind_[tile_idx].begin(); - auto nnz = nnz_[tile_idx]; - - std::size_t tm = - std::min(tile_shape_[0], shape()[0] - tile_index[0] * tile_shape_[0]); - std::size_t tn = - std::min(tile_shape_[1], shape()[1] - tile_index[1] * tile_shape_[1]); - - return segment_type(values, rowptr, colind, key_type(tm, tn), nnz, - values_[tile_idx].rank()); - } - - // Note: this function is currently *not* asynchronous due to a deadlock - // in `gemv_benchmark`. I believe this is a SYCL bug. - template - auto copy_tile_async(key_type tile_index, - csr_matrix_view tile_view) { - std::size_t tile_idx = tile_index[0] * grid_shape_[1] + tile_index[1]; - auto &&values = values_[tile_idx]; - auto &&colind = colind_[tile_idx]; - auto &&rowptr = rowptr_[tile_idx]; - auto &&nnz = nnz_[tile_idx]; - - total_nnz_ -= nnz; - nnz = tile_view.size(); - - total_nnz_ += tile_view.size(); - - values.resize(tile_view.size()); - colind.resize(tile_view.size()); - rowptr.resize(tile_view.shape()[0] + 1); - - auto v_e = experimental::dr::shp::copy_async(tile_view.values_data(), - tile_view.values_data() + values.size(), - values.data()); - - auto c_e = experimental::dr::shp::copy_async(tile_view.colind_data(), - tile_view.colind_data() + colind.size(), - colind.data()); - - auto r_e = experimental::dr::shp::copy_async(tile_view.rowptr_data(), - tile_view.rowptr_data() + rowptr.size(), - rowptr.data()); - - tiles_ = generate_tiles_(); - segments_ = generate_segments_(); - - v_e.wait(); - c_e.wait(); - r_e.wait(); - - return __detail::combine_events({v_e, c_e, r_e}); - } - - template - void copy_tile(key_type tile_index, - csr_matrix_view tile_view) { - copy_tile_async(tile_index, tile_view).wait(); - } - - key_type tile_shape() const noexcept { return tile_shape_; } - - key_type grid_shape() const noexcept { return grid_shape_; } - - std::span tiles() { return std::span(tiles_); } - - std::span segments() { return std::span(segments_); } - -private: - std::vector generate_tiles_() { - std::vector views_; - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t tm = std::min(tile_shape_[0], - shape()[0] - i * tile_shape_[0]); - std::size_t tn = std::min(tile_shape_[1], - shape()[1] - j * tile_shape_[1]); - - std::size_t tile_idx = i * grid_shape_[1] + j; - - auto values = values_[tile_idx].begin(); - auto rowptr = rowptr_[tile_idx].begin(); - auto colind = colind_[tile_idx].begin(); - auto nnz = nnz_[tile_idx]; - - views_.emplace_back(values, rowptr, colind, key_type(tm, tn), nnz, - values_[tile_idx].rank()); - } - } - return views_; - } - - std::vector generate_segments_() { - std::vector views_; - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t tm = std::min(tile_shape_[0], - shape()[0] - i * tile_shape_[0]); - std::size_t tn = std::min(tile_shape_[1], - shape()[1] - j * tile_shape_[1]); - - std::size_t tile_idx = i * grid_shape_[1] + j; - - auto values = values_[tile_idx].begin(); - auto rowptr = rowptr_[tile_idx].begin(); - auto colind = colind_[tile_idx].begin(); - auto nnz = nnz_[tile_idx]; - - std::size_t m_offset = i * tile_shape_[0]; - std::size_t n_offset = j * tile_shape_[1]; - - views_.emplace_back(values, rowptr, colind, key_type(tm, tn), nnz, - values_[i * grid_shape_[1] + j].rank(), - key_type(m_offset, n_offset)); - } - } - return views_; - } - -private: - void init_() { - grid_shape_ = key_type(partition_->grid_shape(shape())); - tile_shape_ = key_type(partition_->tile_shape(shape())); - - values_.reserve(grid_shape_[0] * grid_shape_[1]); - rowptr_.reserve(grid_shape_[0] * grid_shape_[1]); - colind_.reserve(grid_shape_[0] * grid_shape_[1]); - nnz_.reserve(grid_shape_[0] * grid_shape_[1]); - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t rank = partition_->tile_rank(shape(), {i, j}); - - auto device = experimental::dr::shp::devices()[rank]; - experimental::dr::shp::device_allocator alloc(experimental::dr::shp::context(), device); - experimental::dr::shp::device_allocator i_alloc(experimental::dr::shp::context(), device); - - values_.emplace_back(1, alloc, rank); - rowptr_.emplace_back(2, i_alloc, rank); - colind_.emplace_back(1, i_alloc, rank); - nnz_.push_back(0); - rowptr_.back()[0] = 0; - rowptr_.back()[1] = 0; - } - } - tiles_ = generate_tiles_(); - segments_ = generate_segments_(); - } - - void init_random_(double density) { - grid_shape_ = key_type(partition_->grid_shape(shape())); - tile_shape_ = key_type(partition_->tile_shape(shape())); - - values_.reserve(grid_shape_[0] * grid_shape_[1]); - rowptr_.reserve(grid_shape_[0] * grid_shape_[1]); - colind_.reserve(grid_shape_[0] * grid_shape_[1]); - nnz_.reserve(grid_shape_[0] * grid_shape_[1]); - - for (std::size_t i = 0; i < grid_shape_[0]; i++) { - for (std::size_t j = 0; j < grid_shape_[1]; j++) { - std::size_t rank = partition_->tile_rank(shape(), {i, j}); - - std::size_t tm = std::min(tile_shape_[0], - shape()[0] - i * tile_shape_[0]); - std::size_t tn = std::min(tile_shape_[1], - shape()[1] - j * tile_shape_[1]); - - auto device = experimental::dr::shp::devices()[rank]; - experimental::dr::shp::device_allocator alloc(experimental::dr::shp::context(), device); - experimental::dr::shp::device_allocator i_alloc(experimental::dr::shp::context(), device); - - auto seed = i * grid_shape_[1] + j; - - auto csr = generate_random_csr(key_type(tm, tn), density, seed); - std::size_t nnz = csr.size(); - - experimental::dr::shp::device_vector> values( - csr.size(), alloc, rank); - experimental::dr::shp::device_vector> rowptr( - tm + 1, i_alloc, rank); - - experimental::dr::shp::device_vector> colind( - csr.size(), i_alloc, rank); - - experimental::dr::shp::copy(csr.values_data(), csr.values_data() + csr.size(), - values.data()); - experimental::dr::shp::copy(csr.rowptr_data(), csr.rowptr_data() + tm + 1, - rowptr.data()); - experimental::dr::shp::copy(csr.colind_data(), csr.colind_data() + csr.size(), - colind.data()); - - values_.push_back(std::move(values)); - rowptr_.emplace_back(std::move(rowptr)); - colind_.emplace_back(std::move(colind)); - nnz_.push_back(nnz); - total_nnz_ += nnz; - - delete[] csr.values_data(); - delete[] csr.rowptr_data(); - delete[] csr.colind_data(); - } - } - tiles_ = generate_tiles_(); - segments_ = generate_segments_(); - } - -private: - key_type shape_; - key_type grid_shape_; - key_type tile_shape_; - std::unique_ptr partition_; - - std::vector>> values_; - std::vector>> rowptr_; - std::vector>> colind_; - - std::vector nnz_; - std::size_t total_nnz_ = 0; - - std::vector tiles_; - std::vector segments_; -}; - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp index ffbcb323df6..6750c0a1d79 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util.hpp @@ -177,30 +177,31 @@ template void print_range(Range &&r, std::string label = "") { std::cout << "]" << std::endl; } -template -void print_matrix(Matrix &&m, std::string label = "") { - std::cout << m.shape()[0] << " x " << m.shape()[1] << " matrix with " - << m.size() << " stored values"; - if (label != "") { - std::cout << " \"" << label << "\""; - } - std::cout << std::endl; - - for (auto &&tuple : m) { - auto &&[index, value] = tuple; - auto &&[i, j] = index; - - std::cout << "(" << i << ", " << j << "): " << value << std::endl; - } -} +// template +// void print_matrix(Matrix &&m, std::string label = "") { +// std::cout << m.shape()[0] << " x " << m.shape()[1] << " matrix with " +// << m.size() << " stored values"; +// if (label != "") { +// std::cout << " \"" << label << "\""; +// } +// std::cout << std::endl; + +// for (auto &&tuple : m) { +// auto &&[index, value] = tuple; +// auto &&[i, j] = index; + +// std::cout << "(" << i << ", " << j << "): " << value << std::endl; +// } +// } template void print_range_details(R &&r, std::string label = "") { if (label != "") { std::cout << "\"" << label << "\" "; } - std::cout << "distributed range with " << rng::size(experimental::dr::ranges::segments(r)) - << " segments." << std::endl; + std::cout << "distributed range with " + << rng::size(experimental::dr::ranges::segments(r)) << " segments." + << std::endl; std::size_t idx = 0; for (auto &&segment : experimental::dr::ranges::segments(r)) { @@ -213,8 +214,8 @@ template void range_details(R &&r, std::size_t width = 80) { std::size_t size = rng::size(r); - for (auto &&[idx, segment] : - experimental::dr::__detail::enumerate(experimental::dr::ranges::segments(r))) { + for (auto &&[idx, segment] : experimental::dr::__detail::enumerate( + experimental::dr::ranges::segments(r))) { std::size_t local_size = rng::size(segment); double percent = double(local_size) / size; @@ -228,8 +229,8 @@ void range_details(R &&r, std::size_t width = 80) { std::size_t after_whitespace = whitespace - initial_whitespace; std::cout << "[" << std::string(initial_whitespace, ' ') - << experimental::dr::ranges::rank(segment) << std::string(after_whitespace, ' ') - << "]"; + << experimental::dr::ranges::rank(segment) + << std::string(after_whitespace, ' ') << "]"; } std::cout << std::endl; } diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp deleted file mode 100644 index 24f0dc822f6..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/coo_matrix.hpp +++ /dev/null @@ -1,170 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -namespace experimental::dr::shp { - -namespace __detail { - -template > -class coo_matrix { -public: - using value_type = experimental::dr::shp::matrix_entry; - using scalar_type = T; - using index_type = I; - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using allocator_type = Allocator; - - using key_type = experimental::dr::index; - using map_type = T; - - using backend_allocator_type = typename std::allocator_traits< - allocator_type>::template rebind_alloc; - using backend_type = std::vector; - - using iterator = typename backend_type::iterator; - using const_iterator = typename backend_type::const_iterator; - - using reference = experimental::dr::shp::matrix_ref; - using const_reference = experimental::dr::shp::matrix_ref, I>; - - using scalar_reference = T &; - - coo_matrix(experimental::dr::index shape) : shape_(shape) {} - - experimental::dr::index shape() const noexcept { return shape_; } - - size_type size() const noexcept { return tuples_.size(); } - - void reserve(size_type new_cap) { tuples_.reserve(new_cap); } - - iterator begin() noexcept { return tuples_.begin(); } - - const_iterator begin() const noexcept { return tuples_.begin(); } - - iterator end() noexcept { return tuples_.end(); } - - const_iterator end() const noexcept { return tuples_.end(); } - - template void insert(InputIt first, InputIt last) { - for (auto iter = first; iter != last; ++iter) { - insert(*iter); - } - } - - template void push_back(InputIt first, InputIt last) { - for (auto iter = first; iter != last; ++iter) { - push_back(*iter); - } - } - - void push_back(const value_type &value) { tuples_.push_back(value); } - - template void assign_tuples(InputIt first, InputIt last) { - tuples_.assign(first, last); - } - - std::pair insert(value_type &&value) { - auto &&[insert_index, insert_value] = value; - for (auto iter = begin(); iter != end(); ++iter) { - auto &&[index, v] = *iter; - if (index == insert_index) { - return {iter, false}; - } - } - tuples_.push_back(value); - return {--tuples_.end(), true}; - } - - std::pair insert(const value_type &value) { - auto &&[insert_index, insert_value] = value; - for (auto iter = begin(); iter != end(); ++iter) { - auto &&[index, v] = *iter; - if (index == insert_index) { - return {iter, false}; - } - } - tuples_.push_back(value); - return {--tuples_.end(), true}; - } - - template - std::pair insert_or_assign(key_type k, M &&obj) { - for (auto iter = begin(); iter != end(); ++iter) { - auto &&[index, v] = *iter; - if (index == k) { - v = std::forward(obj); - return {iter, false}; - } - } - tuples_.push_back({k, std::forward(obj)}); - return {--tuples_.end(), true}; - } - - iterator find(key_type key) noexcept { - return std::find_if(begin(), end(), [&](auto &&v) { - auto &&[i, v_] = v; - return i == key; - }); - } - - const_iterator find(key_type key) const noexcept { - return std::find_if(begin(), end(), [&](auto &&v) { - auto &&[i, v_] = v; - return i == key; - }); - } - - void reshape(experimental::dr::index shape) { - bool all_inside = true; - for (auto &&[index, v] : *this) { - auto &&[i, j] = index; - if (!(i < shape[0] && j < shape[1])) { - all_inside = false; - break; - } - } - - if (all_inside) { - shape_ = shape; - return; - } else { - coo_matrix new_tuples(shape); - for (auto &&[index, v] : *this) { - auto &&[i, j] = index; - if (i < shape[0] && j < shape[1]) { - new_tuples.insert({index, v}); - } - } - shape_ = shape; - assign_tuples(new_tuples.begin(), new_tuples.end()); - } - } - - coo_matrix() = default; - ~coo_matrix() = default; - coo_matrix(const coo_matrix &) = default; - coo_matrix(coo_matrix &&) = default; - coo_matrix &operator=(const coo_matrix &) = default; - coo_matrix &operator=(coo_matrix &&) = default; - - std::size_t nbytes() const noexcept { - return tuples_.size() * sizeof(value_type); - } - -private: - experimental::dr::index shape_; - backend_type tuples_; -}; - -} // namespace __detail - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp deleted file mode 100644 index e3ebeb099ed..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/generate_random.hpp +++ /dev/null @@ -1,92 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include -#include - -namespace experimental::dr::shp { - -namespace { - -template struct uniform_distribution { - using type = std::uniform_int_distribution; -}; - -template struct uniform_distribution { - using type = std::uniform_real_distribution; -}; - -template -using uniform_distribution_t = typename uniform_distribution::type; - -} // namespace - -template -auto generate_random_csr(experimental::dr::index shape, double density = 0.01, - unsigned int seed = 0) { - - assert(density >= 0.0 && density < 1.0); - - std::map, T> tuples; - - std::size_t nnz = density * shape[0] * shape[1]; - - std::mt19937 gen(seed); - std::uniform_int_distribution row(0, shape[0] - 1); - std::uniform_int_distribution column(0, shape[1] - 1); - - uniform_distribution_t value_gen(0, 1); - - while (tuples.size() < nnz) { - auto i = row(gen); - auto j = column(gen); - if (tuples.find({i, j}) == tuples.end()) { - T value = value_gen(gen); - tuples.insert({{i, j}, value}); - } - } - - T *values = new T[nnz]; - I *rowptr = new I[shape[0] + 1]; - I *colind = new I[nnz]; - - rowptr[0] = 0; - - std::size_t r = 0; - std::size_t c = 0; - for (auto iter = tuples.begin(); iter != tuples.end(); ++iter) { - auto &&[index, value] = *iter; - auto &&[i, j] = index; - - values[c] = value; - colind[c] = j; - - while (r < i) { - if (r + 1 > shape[0]) { - // TODO: exception? - // throw std::runtime_error("csr_matrix_impl_: given invalid matrix"); - } - rowptr[r + 1] = c; - r++; - } - c++; - - if (c > nnz) { - // TODO: exception? - // throw std::runtime_error("csr_matrix_impl_: given invalid matrix"); - } - } - - for (; r < shape[0]; r++) { - rowptr[r + 1] = nnz; - } - - return csr_matrix_view(values, rowptr, colind, shape, nnz, 0); -} - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp deleted file mode 100644 index 14c1e24c6e1..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/util/matrix_io.hpp +++ /dev/null @@ -1,289 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace experimental::dr::shp { - -namespace __detail { - -// Preconditions: -// 1) `tuples` sorted by row, column -// 2) `tuples` has shape `shape` -// 3) `tuples` has `nnz` elements -template -auto convert_to_csr(Tuples &&tuples, experimental::dr::index<> shape, std::size_t nnz, - Allocator &&allocator) { - auto &&[index, v] = *tuples.begin(); - auto &&[i, j] = index; - - using T = std::remove_reference_t; - using I = std::remove_reference_t; - - typename std::allocator_traits::template rebind_alloc - i_allocator(allocator); - - T *values = allocator.allocate(nnz); - I *rowptr = i_allocator.allocate(shape[0] + 1); - I *colind = i_allocator.allocate(nnz); - - rowptr[0] = 0; - - std::size_t r = 0; - std::size_t c = 0; - for (auto iter = tuples.begin(); iter != tuples.end(); ++iter) { - auto &&[index, value] = *iter; - auto &&[i, j] = index; - - values[c] = value; - colind[c] = j; - - while (r < i) { - assert(r + 1 <= shape[0]); - // throw std::runtime_error("csr_matrix_impl_: given invalid matrix"); - rowptr[r + 1] = c; - r++; - } - c++; - - assert(c <= nnz); - // throw std::runtime_error("csr_matrix_impl_: given invalid matrix"); - } - - for (; r < shape[0]; r++) { - rowptr[r + 1] = nnz; - } - - return csr_matrix_view(values, rowptr, colind, - experimental::dr::index(shape[0], shape[1]), nnz, 0); -} - -/// Read in the Matrix Market file at location `file_path` and a return -/// a coo_matrix data structure with its contents. -template -inline coo_matrix mmread(std::string file_path, bool one_indexed = true) { - using size_type = std::size_t; - - std::ifstream f; - - f.open(file_path.c_str()); - - if (!f.is_open()) { - // TODO better choice of exception. - throw std::runtime_error("mmread: cannot open " + file_path); - } - - std::string buf; - - // Make sure the file is matrix market matrix, coordinate, and check whether - // it is symmetric. If the matrix is symmetric, non-diagonal elements will - // be inserted in both (i, j) and (j, i). Error out if skew-symmetric or - // Hermitian. - std::getline(f, buf); - std::istringstream ss(buf); - std::string item; - ss >> item; - if (item != "%%MatrixMarket") { - throw std::runtime_error(file_path + - " could not be parsed as a Matrix Market file."); - } - ss >> item; - if (item != "matrix") { - throw std::runtime_error(file_path + - " could not be parsed as a Matrix Market file."); - } - ss >> item; - if (item != "coordinate") { - throw std::runtime_error(file_path + - " could not be parsed as a Matrix Market file."); - } - bool pattern; - ss >> item; - if (item == "pattern") { - pattern = true; - } else { - pattern = false; - } - // TODO: do something with real vs. integer vs. pattern? - ss >> item; - bool symmetric; - if (item == "general") { - symmetric = false; - } else if (item == "symmetric") { - symmetric = true; - } else { - throw std::runtime_error(file_path + " has an unsupported matrix type"); - } - - bool outOfComments = false; - while (!outOfComments) { - std::getline(f, buf); - - if (buf[0] != '%') { - outOfComments = true; - } - } - - I m, n, nnz; - // std::istringstream ss(buf); - ss.clear(); - ss.str(buf); - ss >> m >> n >> nnz; - - // NOTE for symmetric matrices: `nnz` holds the number of stored values in - // the matrix market file, while `matrix.nnz_` will hold the total number of - // stored values (including "mirrored" symmetric values). - coo_matrix matrix({m, n}); - if (symmetric) { - matrix.reserve(2 * nnz); - } else { - matrix.reserve(nnz); - } - - size_type c = 0; - while (std::getline(f, buf)) { - I i, j; - T v; - std::istringstream ss(buf); - if (!pattern) { - ss >> i >> j >> v; - } else { - ss >> i >> j; - v = T(1); - } - if (one_indexed) { - i--; - j--; - } - - if (i >= m || j >= n) { - throw std::runtime_error( - "read_MatrixMarket: file has nonzero out of bounds."); - } - - matrix.push_back({{i, j}, v}); - - if (symmetric && i != j) { - matrix.push_back({{j, i}, v}); - } - - c++; - if (c > nnz) { - throw std::runtime_error("read_MatrixMarket: error reading Matrix Market " - "file, file has more nonzeros than reported."); - } - } - - auto sort_fn = [](const auto &a, const auto &b) { - auto &&[a_index, a_value] = a; - auto &&[b_index, b_value] = b; - auto &&[a_i, a_j] = a_index; - auto &&[b_i, b_j] = b_index; - if (a_i < b_i) { - return true; - } else if (a_i == b_i) { - if (a_j < b_j) { - return true; - } - } - return false; - }; - - std::sort(matrix.begin(), matrix.end(), sort_fn); - - f.close(); - - return matrix; -} - -template -void destroy_csr_matrix_view(experimental::dr::shp::csr_matrix_view view, - Allocator &&alloc) { - alloc.deallocate(view.values_data(), view.size()); - typename std::allocator_traits::template rebind_alloc i_alloc( - alloc); - i_alloc.deallocate(view.colind_data(), view.size()); - i_alloc.deallocate(view.rowptr_data(), view.shape()[0] + 1); -} - -} // namespace __detail - -template -auto create_distributed(experimental::dr::shp::csr_matrix_view local_mat, - const matrix_partition &partition) { - experimental::dr::shp::sparse_matrix a(local_mat.shape(), partition); - - std::vector> views; - std::vector events; - views.reserve(a.grid_shape()[0] * a.grid_shape()[1]); - - for (I i = 0; i < a.grid_shape()[0]; i++) { - for (I j = 0; j < a.grid_shape()[1]; j++) { - auto &&tile = a.tile({i, j}); - experimental::dr::index row_bounds(i * a.tile_shape()[0], - i * a.tile_shape()[0] + tile.shape()[0]); - experimental::dr::index column_bounds(j * a.tile_shape()[1], - j * a.tile_shape()[1] + tile.shape()[1]); - - auto local_submat = local_mat.submatrix(row_bounds, column_bounds); - - auto submatrix_shape = experimental::dr::index(row_bounds[1] - row_bounds[0], - column_bounds[1] - column_bounds[0]); - - auto copied_submat = __detail::convert_to_csr( - local_submat, submatrix_shape, rng::distance(local_submat), - std::allocator{}); - - auto e = a.copy_tile_async({i, j}, copied_submat); - - views.push_back(copied_submat); - events.push_back(e); - } - } - __detail::wait(events); - - for (auto &&view : views) { - __detail::destroy_csr_matrix_view(view, std::allocator{}); - } - - return a; -} - -template -auto mmread(std::string file_path, const matrix_partition &partition, - bool one_indexed = true) { - auto m = __detail::mmread(file_path, one_indexed); - auto shape = m.shape(); - auto nnz = m.size(); - - auto local_mat = __detail::convert_to_csr(m, shape, nnz, std::allocator{}); - - auto a = create_distributed(local_mat, partition); - - __detail::destroy_csr_matrix_view(local_mat, std::allocator{}); - - return a; -} - -template -auto mmread(std::string file_path, bool one_indexed = true) { - return mmread( - file_path, - experimental::dr::shp::block_cyclic({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, - {experimental::dr::shp::nprocs(), 1}), - one_indexed); -} - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp deleted file mode 100644 index 7d8f1813cd2..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/csr_matrix_view.hpp +++ /dev/null @@ -1,225 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -namespace experimental::dr::shp { - -template -class csr_matrix_view_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_type = std::iter_value_t; - using scalar_reference = std::iter_reference_t; - - using index_type = I; - - using value_type = experimental::dr::shp::matrix_entry; - - using reference = experimental::dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = csr_matrix_view_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using key_type = experimental::dr::index; - - constexpr csr_matrix_view_accessor() noexcept = default; - constexpr ~csr_matrix_view_accessor() noexcept = default; - constexpr csr_matrix_view_accessor( - const csr_matrix_view_accessor &) noexcept = default; - constexpr csr_matrix_view_accessor & - operator=(const csr_matrix_view_accessor &) noexcept = default; - - constexpr csr_matrix_view_accessor(TIter values, IIter rowptr, IIter colind, - size_type idx, index_type row, - size_type row_dim) noexcept - : values_(values), rowptr_(rowptr), colind_(colind), idx_(idx), row_(row), - row_dim_(row_dim), idx_offset_(key_type{0, 0}) { - fast_forward_row(); - } - - constexpr csr_matrix_view_accessor(TIter values, IIter rowptr, IIter colind, - size_type idx, index_type row, - size_type row_dim, - key_type idx_offset) noexcept - : values_(values), rowptr_(rowptr), colind_(colind), idx_(idx), row_(row), - row_dim_(row_dim), idx_offset_(idx_offset) { - fast_forward_row(); - } - - // Given that `idx_` has just been advanced to an element - // possibly in a new row, advance `row_` to find the new row. - // That is: - // Advance `row_` until idx_ >= rowptr_[row_] && idx_ < rowptr_[row_+1] - void fast_forward_row() noexcept { - while (row_ < row_dim_ - 1 && idx_ >= rowptr_[row_ + 1]) { - row_++; - } - } - - // Given that `idx_` has just been retreated to an element - // possibly in a previous row, retreat `row_` to find the new row. - // That is: - // Retreat `row_` until idx_ >= rowptr_[row_] && idx_ < rowptr_[row_+1] - void fast_backward_row() noexcept { - while (idx_ < rowptr_[row_]) { - row_--; - } - } - - constexpr csr_matrix_view_accessor & - operator+=(difference_type offset) noexcept { - idx_ += offset; - if (offset < 0) { - fast_backward_row(); - } else { - fast_forward_row(); - } - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return idx_ == other.idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(idx_) - difference_type(other.idx_); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - return idx_ < other.idx_; - } - - constexpr reference operator*() const noexcept { - return reference( - key_type(row_ + idx_offset_[0], colind_[idx_] + idx_offset_[1]), - values_[idx_]); - } - -private: - TIter values_; - IIter rowptr_; - IIter colind_; - size_type idx_; - index_type row_; - size_type row_dim_; - key_type idx_offset_; -}; - -template -using csr_matrix_view_iterator = - experimental::dr::iterator_adaptor>; - -template -class csr_matrix_view - : public rng::view_interface> { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_reference = std::iter_reference_t; - using reference = experimental::dr::shp::matrix_ref; - - using scalar_type = T; - using index_type = I; - - using key_type = experimental::dr::index; - using map_type = T; - - using iterator = csr_matrix_view_iterator; - - csr_matrix_view(TIter values, IIter rowptr, IIter colind, key_type shape, - size_type nnz, size_type rank) - : values_(values), rowptr_(rowptr), colind_(colind), shape_(shape), - nnz_(nnz), rank_(rank), idx_offset_(key_type{0, 0}) {} - - csr_matrix_view(TIter values, IIter rowptr, IIter colind, key_type shape, - size_type nnz, size_type rank, key_type idx_offset) - : values_(values), rowptr_(rowptr), colind_(colind), shape_(shape), - nnz_(nnz), rank_(rank), idx_offset_(idx_offset) {} - - key_type shape() const noexcept { return shape_; } - - size_type size() const noexcept { return nnz_; } - - std::size_t rank() const { return rank_; } - - iterator begin() const { - return iterator(values_, rowptr_, colind_, 0, 0, shape()[1], idx_offset_); - } - - iterator end() const { - return iterator(values_, rowptr_, colind_, nnz_, shape()[1], shape()[1], - idx_offset_); - } - - auto row(I row_index) const { - I first = rowptr_[row_index]; - I last = rowptr_[row_index + 1]; - - TIter values = values_; - IIter colind = colind_; - - auto row_elements = rng::views::iota(first, last); - - return row_elements | rng::views::transform([=](auto idx) { - return reference(key_type(row_index, colind[idx]), values[idx]); - }); - } - - auto submatrix(key_type rows, key_type columns) const { - return rng::views::iota(rows[0], rows[1]) | - rng::views::transform([=, *this](auto &&row_index) { - return row(row_index) | rng::views::drop_while([=](auto &&e) { - auto &&[index, v] = e; - return index[1] < columns[0]; - }) | - rng::views::take_while([=](auto &&e) { - auto &&[index, v] = e; - return index[1] < columns[1]; - }) | - rng::views::transform([=](auto &&elem) { - auto &&[index, v] = elem; - auto &&[i, j] = index; - return reference(key_type(i - rows[0], j - columns[0]), - v); - }); - }) | - rng::views::join; - } - - auto values_data() const { return values_; } - - auto rowptr_data() const { return rowptr_; } - - auto colind_data() const { return colind_; } - -private: - TIter values_; - IIter rowptr_; - IIter colind_; - - key_type shape_; - size_type nnz_; - - size_type rank_; - key_type idx_offset_; -}; - -template -csr_matrix_view(TIter, IIter, IIter, Args &&...) - -> csr_matrix_view, std::iter_value_t, - TIter, IIter>; - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp deleted file mode 100644 index 4543af1dd04..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_column_view.hpp +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include - -namespace experimental::dr::shp { -template class dense_matrix_column_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_value_type = std::iter_value_t; - using scalar_reference = std::iter_reference_t; - - using value_type = experimental::dr::shp::matrix_entry; - - using reference = experimental::dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = dense_matrix_column_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using key_type = experimental::dr::index<>; - - constexpr dense_matrix_column_accessor() noexcept = default; - constexpr ~dense_matrix_column_accessor() noexcept = default; - constexpr dense_matrix_column_accessor( - const dense_matrix_column_accessor &) noexcept = default; - constexpr dense_matrix_column_accessor & - operator=(const dense_matrix_column_accessor &) noexcept = default; - - constexpr dense_matrix_column_accessor(Iter data, std::size_t i, - std::size_t j, std::size_t ld) noexcept - : data_(data), i_(i), j_(j), ld_(ld) {} - - constexpr dense_matrix_column_accessor & - operator+=(difference_type offset) noexcept { - i_ += offset; - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return i_ == other.i_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(i_) - difference_type(other.i_); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - return i_ < other.i_; - } - - constexpr reference operator*() const noexcept { - return reference(key_type({i_, j_}), data_[i_ * ld_]); - } - -private: - size_type i_, j_; - size_type ld_; - - Iter data_; -}; - -template -using dense_matrix_column_iterator = - experimental::dr::iterator_adaptor>; - -template class dense_matrix_column_view { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_reference = std::iter_reference_t; - - using key_type = experimental::dr::index<>; - using map_type = T; - - using iterator = dense_matrix_column_iterator; - - dense_matrix_column_view(Iter data, size_type column_idx, size_type size, - size_type ld) - : data_(data), column_idx_(column_idx), size_(size), ld_(ld) {} - - scalar_reference operator[](size_type idx) { return data_[idx * ld_]; } - - iterator begin() const { return iterator(data_, 0, column_idx_, ld_); } - - iterator end() const { return iterator(data_, size_, column_idx_, ld_); } - - size_type size() const noexcept { return size_; } - - Iter data_; - size_type column_idx_; - size_type size_; - size_type ld_; -}; - -template -dense_matrix_column_view(Iter, std::size_t, std::size_t, std::size_t) - -> dense_matrix_column_view, Iter>; - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp deleted file mode 100644 index b2674679b20..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_iterator.hpp +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#include -#include -#include -#include -#include - -namespace experimental::dr::shp { - -template class dense_matrix_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_type = std::iter_value_t; - using scalar_reference = std::iter_reference_t; - - using value_type = experimental::dr::shp::matrix_entry; - - using reference = experimental::dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = dense_matrix_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using key_type = experimental::dr::index<>; - - constexpr dense_matrix_accessor() noexcept = default; - constexpr ~dense_matrix_accessor() noexcept = default; - constexpr dense_matrix_accessor(const dense_matrix_accessor &) noexcept = - default; - constexpr dense_matrix_accessor & - operator=(const dense_matrix_accessor &) noexcept = default; - - constexpr dense_matrix_accessor(Iter data, key_type idx, - key_type matrix_shape, size_type ld) noexcept - : data_(data), idx_(idx), matrix_shape_(matrix_shape), ld_(ld), - idx_offset_({0, 0}) {} - - constexpr dense_matrix_accessor(Iter data, key_type idx, key_type idx_offset, - key_type matrix_shape, size_type ld) noexcept - : data_(data), idx_(idx), matrix_shape_(matrix_shape), ld_(ld), - idx_offset_(idx_offset) {} - - constexpr dense_matrix_accessor &operator+=(difference_type offset) noexcept { - size_type new_idx = get_global_idx() + offset; - idx_ = {new_idx / matrix_shape_[1], new_idx % matrix_shape_[1]}; - - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return idx_ == other.idx_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(get_global_idx()) - other.get_global_idx(); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - if (idx_[0] < other.idx_[0]) { - return true; - } else if (idx_[0] == other.idx_[0]) { - return idx_[1] < other.idx_[1]; - } else { - return false; - } - } - - constexpr reference operator*() const noexcept { - return reference( - key_type(idx_[0] + idx_offset_[0], idx_[1] + idx_offset_[1]), - data_[idx_[0] * ld_ + idx_[1]]); - } - - Iter data() const noexcept { return data_; } - -private: - size_type get_global_idx() const noexcept { - return idx_[0] * matrix_shape_[1] + idx_[1]; - } - -private: - Iter data_; - key_type idx_; - key_type matrix_shape_; - size_type ld_; - - key_type idx_offset_; -}; - -template -using dense_matrix_iterator = - experimental::dr::iterator_adaptor>; - -template -using dense_matrix_view_iterator = dense_matrix_iterator; - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp deleted file mode 100644 index 4ad164708b7..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_matrix_view.hpp +++ /dev/null @@ -1,124 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace experimental::dr::shp { - -template -class dense_matrix_view - : public rng::view_interface> { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_reference = std::iter_reference_t; - using reference = experimental::dr::shp::matrix_ref; - - using key_type = experimental::dr::index<>; - using map_type = T; - - using iterator = dense_matrix_view_iterator; - - dense_matrix_view(Iter data, key_type shape, size_type ld, size_type rank) - : data_(data), shape_(shape), idx_offset_(key_type{0, 0}), ld_(ld), - rank_(rank) {} - - dense_matrix_view(Iter data, key_type shape, key_type idx_offset, - size_type ld, size_type rank) - : data_(data), shape_(shape), idx_offset_(idx_offset), ld_(ld), - rank_(rank) {} - - template - requires(std::is_same_v::pointer, - Iter>) - dense_matrix_view(dense_matrix &m) - : data_(m.data()), shape_(m.shape()), idx_offset_(key_type{0, 0}), - ld_(m.ld()), rank_(0) {} - - key_type shape() const noexcept { return shape_; } - - size_type size() const noexcept { return shape()[0] * shape()[1]; } - - scalar_reference operator[](key_type idx) const { - return data_[idx[0] * ld_ + idx[1]]; - } - - iterator begin() const { - return iterator(data_, key_type{0, 0}, idx_offset_, shape_, ld_); - } - - iterator end() const { - return iterator(data_, key_type{shape_[0], 0}, idx_offset_, shape_, ld_); - } - - auto row(size_type row_index) const { - // return dense_matrix_row_view(data_ + row_index * ld_, row_index, - // shape()[1]); - auto row_elements = rng::views::iota(size_type(0), size_type(shape()[1])); - Iter data = data_ + row_index * ld_; - - return row_elements | rng::views::transform([=](auto column_index) { - return reference(key_type(row_index, column_index), - data[column_index]); - }); - } - - auto column(size_type column_index) const { - // return dense_matrix_column_view(data_ + column_index, column_index, - // shape()[0], ld_); - auto column_elements = - rng::views::iota(size_type(0), size_type(shape()[0])); - Iter data = data_ + column_index; - size_type ld = ld_; - - return column_elements | rng::views::transform([=](auto row_index) { - return reference(key_type(row_index, column_index), - data[row_index * ld]); - }); - } - - Iter data() const { return data_; } - - std::size_t rank() const { return rank_; } - - size_type ld() const { return ld_; } - - auto local() const { - auto local_data = __detail::local(data_); - return dense_matrix_view( - local_data, shape_, idx_offset_, ld(), rank()); - } - -private: - Iter data_; - key_type shape_; - key_type idx_offset_; - size_type ld_; - size_type rank_; -}; - -template -dense_matrix_view(Iter, experimental::dr::index<>, std::size_t) - -> dense_matrix_view, Iter>; - -template -dense_matrix_view(Iter, experimental::dr::index<>) - -> dense_matrix_view, Iter>; - -template -dense_matrix_view(dense_matrix &) - -> dense_matrix_view::pointer>; - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp deleted file mode 100644 index d88e0b3682c..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/dense_row_view.hpp +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#pragma once - -#include -#include -#include -#include - -namespace experimental::dr::shp { -template class dense_matrix_row_accessor { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_value_type = std::iter_value_t; - using scalar_reference = std::iter_reference_t; - - using value_type = experimental::dr::shp::matrix_entry; - - using reference = experimental::dr::shp::matrix_ref; - - using iterator_category = std::random_access_iterator_tag; - - using iterator_accessor = dense_matrix_row_accessor; - using const_iterator_accessor = iterator_accessor; - using nonconst_iterator_accessor = iterator_accessor; - - using key_type = experimental::dr::index<>; - - constexpr dense_matrix_row_accessor() noexcept = default; - constexpr ~dense_matrix_row_accessor() noexcept = default; - constexpr dense_matrix_row_accessor( - const dense_matrix_row_accessor &) noexcept = default; - constexpr dense_matrix_row_accessor & - operator=(const dense_matrix_row_accessor &) noexcept = default; - - constexpr dense_matrix_row_accessor(Iter data, std::size_t i, - std::size_t j) noexcept - : data_(data), i_(i), j_(j) {} - - constexpr dense_matrix_row_accessor & - operator+=(difference_type offset) noexcept { - j_ += offset; - return *this; - } - - constexpr bool operator==(const iterator_accessor &other) const noexcept { - return j_ == other.j_; - } - - constexpr difference_type - operator-(const iterator_accessor &other) const noexcept { - return difference_type(j_) - difference_type(other.j_); - } - - constexpr bool operator<(const iterator_accessor &other) const noexcept { - return j_ < other.j_; - } - - constexpr reference operator*() const noexcept { - return reference(key_type({i_, j_}), data_[j_]); - } - -private: - size_type i_, j_; - - Iter data_; -}; - -template -using dense_matrix_row_iterator = - experimental::dr::iterator_adaptor>; - -template class dense_matrix_row_view { -public: - using size_type = std::size_t; - using difference_type = std::ptrdiff_t; - - using scalar_reference = std::iter_reference_t; - - using key_type = experimental::dr::index<>; - using map_type = T; - - using iterator = dense_matrix_row_iterator; - - dense_matrix_row_view(Iter data, size_type row_idx, size_type size) - : data_(data), row_idx_(row_idx), size_(size) {} - - scalar_reference operator[](size_type idx) { return data_[idx]; } - - iterator begin() const { return iterator(data_, row_idx_, 0); } - - iterator end() const { return iterator(data_, row_idx_, size_); } - - size_type size() const noexcept { return size_; } - - Iter data_; - size_type row_idx_; - size_type size_; -}; - -template -dense_matrix_row_view(Iter, std::size_t, std::size_t) - -> dense_matrix_row_view, Iter>; - -} // namespace experimental::dr::shp diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp index eb503a24961..e5f14ae6c38 100644 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp +++ b/include/oneapi/dpl/internal/distributed_ranges_impl/shp/views/standard_views.hpp @@ -17,13 +17,15 @@ namespace views { template auto slice(R &&r, experimental::dr::index<> slice_indices) { - return experimental::dr::shp::distributed_span(experimental::dr::ranges::segments(std::forward(r))) + return experimental::dr::shp::distributed_span( + experimental::dr::ranges::segments(std::forward(r))) .subspan(slice_indices[0], slice_indices[1] - slice_indices[0]); } class slice_adaptor_closure { public: - slice_adaptor_closure(experimental::dr::index<> slice_indices) : idx_(slice_indices) {} + slice_adaptor_closure(experimental::dr::index<> slice_indices) + : idx_(slice_indices) {} template auto operator()(R &&r) const { return slice(std::forward(r), idx_); diff --git a/include/oneapi/dpl/internal/distributed_ranges_impl/source_location/source_location.hpp b/include/oneapi/dpl/internal/distributed_ranges_impl/source_location/source_location.hpp deleted file mode 100644 index a6bd21b9c78..00000000000 --- a/include/oneapi/dpl/internal/distributed_ranges_impl/source_location/source_location.hpp +++ /dev/null @@ -1,65 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#ifndef NOSTD_SOURCE_LOCATION_HPP -#define NOSTD_SOURCE_LOCATION_HPP - -#pragma once - -#include - -namespace nostd { -struct source_location { -public: -#if not defined(__apple_build_version__) and defined(__clang__) and \ - (__clang_major__ >= 9) - static constexpr source_location - current(const char *fileName = __builtin_FILE(), - const char *functionName = __builtin_FUNCTION(), - const uint_least32_t lineNumber = __builtin_LINE(), - const uint_least32_t columnOffset = __builtin_COLUMN()) noexcept -#elif defined(__GNUC__) and \ - (__GNUC__ > 4 or (__GNUC__ == 4 and __GNUC_MINOR__ >= 8)) - static constexpr source_location - current(const char *fileName = __builtin_FILE(), - const char *functionName = __builtin_FUNCTION(), - const uint_least32_t lineNumber = __builtin_LINE(), - const uint_least32_t columnOffset = 0) noexcept -#else - static constexpr source_location - current(const char *fileName = "unsupported", - const char *functionName = "unsupported", - const uint_least32_t lineNumber = 0, - const uint_least32_t columnOffset = 0) noexcept -#endif - { - return source_location(fileName, functionName, lineNumber, columnOffset); - } - - source_location(const source_location &) = default; - source_location(source_location &&) = default; - - constexpr const char *file_name() const noexcept { return fileName; } - - constexpr const char *function_name() const noexcept { return functionName; } - - constexpr uint_least32_t line() const noexcept { return lineNumber; } - - constexpr std::uint_least32_t column() const noexcept { return columnOffset; } - -private: - constexpr source_location(const char *fileName, const char *functionName, - const uint_least32_t lineNumber, - const uint_least32_t columnOffset) noexcept - : fileName(fileName), functionName(functionName), lineNumber(lineNumber), - columnOffset(columnOffset) {} - - const char *fileName; - const char *functionName; - const std::uint_least32_t lineNumber; - const std::uint_least32_t columnOffset; -}; -} // namespace nostd - -#endif diff --git a/test/distributed-ranges/common/distributed_vector.cpp b/test/distributed-ranges/common/distributed_vector.cpp index f3362706843..b78baf7e2fe 100644 --- a/test/distributed-ranges/common/distributed_vector.cpp +++ b/test/distributed-ranges/common/distributed_vector.cpp @@ -12,7 +12,6 @@ template class DistributedVectorAllTypes : public testing::Test { TYPED_TEST_SUITE(DistributedVectorAllTypes, AllTypes); TYPED_TEST(DistributedVectorAllTypes, StaticAsserts) { - DRLOG("Running StaticAsserts test"); TypeParam dv(10); static_assert(rng::random_access_range); static_assert(rng::random_access_range); @@ -27,23 +26,16 @@ TYPED_TEST(DistributedVectorAllTypes, StaticAsserts) { } TYPED_TEST(DistributedVectorAllTypes, getAndPut) { - DRLOG("Running getAndPut test"); TypeParam dv(10); if (comm_rank == 0) { - DRLOG("DV constructed, assign sth on root rank"); dv[5] = 13; - DRLOG("13 assigned on root, now calling fence"); } else { - DRLOG("DV constructed, we are on non-root rank so just call fence"); } fence_on(dv); - DRLOG("barrier called now reading"); for (std::size_t idx = 0; idx < 10; ++idx) { - DRLOG("reading idx:{}", idx); auto val = dv[idx]; - DRLOG("read idx:{} finished, got:{}", idx, val); if (idx == 5) { EXPECT_EQ(val, 13); } else { @@ -53,7 +45,6 @@ TYPED_TEST(DistributedVectorAllTypes, getAndPut) { } TYPED_TEST(DistributedVectorAllTypes, Stream) { - DRLOG("Running Stream test"); Ops1 ops(10); std::ostringstream os; os << ops.dist_vec; @@ -61,7 +52,6 @@ TYPED_TEST(DistributedVectorAllTypes, Stream) { } TYPED_TEST(DistributedVectorAllTypes, Equality) { - DRLOG("Running Equality test"); Ops1 ops(10); iota(ops.dist_vec, 100); rng::iota(ops.vec, 100); @@ -70,7 +60,6 @@ TYPED_TEST(DistributedVectorAllTypes, Equality) { } TYPED_TEST(DistributedVectorAllTypes, Segments) { - DRLOG("Running Segments test"); Ops1 ops(10); EXPECT_TRUE(check_segments(ops.dist_vec)); @@ -79,7 +68,6 @@ TYPED_TEST(DistributedVectorAllTypes, Segments) { } TEST(DistributedVector, ConstructorBasic) { - DRLOG("Running ConstructorBasic test"); xhp::distributed_vector dist_vec(10); iota(dist_vec, 100); @@ -90,7 +78,6 @@ TEST(DistributedVector, ConstructorBasic) { } TEST(DistributedVector, ConstructorFill) { - DRLOG("Running ConstructorFill test"); xhp::distributed_vector dist_vec(10, 1); std::vector local_vec(10, 1); diff --git a/test/distributed-ranges/shp/CMakeLists.txt b/test/distributed-ranges/shp/CMakeLists.txt index 9d32775bf21..063515a87ec 100644 --- a/test/distributed-ranges/shp/CMakeLists.txt +++ b/test/distributed-ranges/shp/CMakeLists.txt @@ -65,7 +65,7 @@ add_executable( ../common/reduce.cpp ../common/sort.cpp ../common/subrange.cpp ../common/take.cpp ../common/transform.cpp ../common/transform_view.cpp ../common/zip.cpp ../common/zip_local.cpp containers.cpp algorithms.cpp - copy.cpp detail.cpp fill.cpp gemv.cpp transform.cpp) + copy.cpp detail.cpp fill.cpp transform.cpp) add_executable(shp-tests-3 shp-tests.cpp containers-3.cpp copy-3.cpp) diff --git a/test/distributed-ranges/shp/gemv.cpp b/test/distributed-ranges/shp/gemv.cpp deleted file mode 100644 index b0d5dce4195..00000000000 --- a/test/distributed-ranges/shp/gemv.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-FileCopyrightText: Intel Corporation -// -// SPDX-License-Identifier: BSD-3-Clause - -#include "xhp-tests.hpp" - -TEST(SparseMatrix, Gemv) { - std::size_t m = 100; - std::size_t k = 100; - - experimental::dr::shp::sparse_matrix a( - {m, k}, 0.1f, - experimental::dr::shp::block_cyclic({experimental::dr::shp::tile::div, experimental::dr::shp::tile::div}, - {experimental::dr::shp::nprocs(), 1})); - - experimental::dr::shp::distributed_vector b(k, 1.f); - experimental::dr::shp::distributed_vector c(m, 0.f); - - experimental::dr::shp::gemv(c, a, b); - - std::vector c_local(m); - - experimental::dr::shp::copy(c.begin(), c.end(), c_local.begin()); - - std::vector c_ref(m, 0.f); - - for (auto &&[index, v] : a) { - auto &&[i, k] = index; - - c_ref[i] += v; - } - - EXPECT_TRUE(fp_equal(c_ref, c_local)) - << fmt::format("Reference:\n {}\nActual:\n {}\n", c_ref, c_local); -} diff --git a/test/distributed-ranges/shp/xhp-tests.hpp b/test/distributed-ranges/shp/xhp-tests.hpp index 10bdb454831..5b02a6b2de6 100644 --- a/test/distributed-ranges/shp/xhp-tests.hpp +++ b/test/distributed-ranges/shp/xhp-tests.hpp @@ -8,7 +8,7 @@ #include #include #include -#include +// #include #define TEST_SHP @@ -23,7 +23,8 @@ inline void barrier() {} inline void fence() {} inline void fence_on(auto &&) {} -using AllocatorTypes = ::testing::Types>; +using AllocatorTypes = + ::testing::Types>; template concept compliant_view = rng::forward_range && requires(V &v) {