Skip to content

Commit

Permalink
Merge pull request #6608 from STEllAR-GROUP/for_loop_simd
Browse files Browse the repository at this point in the history
Partially support data parallel for_loop
  • Loading branch information
hkaiser authored Feb 13, 2025
2 parents b3d750f + 57d3fac commit 62d92ad
Show file tree
Hide file tree
Showing 11 changed files with 297 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#if defined(HPX_HAVE_DATAPAR)
#include <hpx/assert.hpp>
#include <hpx/execution/traits/vector_pack_alignment_size.hpp>
#include <hpx/execution/traits/vector_pack_get_set.hpp>
#include <hpx/execution/traits/vector_pack_load_store.hpp>
#include <hpx/execution/traits/vector_pack_type.hpp>
#include <hpx/functional/detail/invoke.hpp>
Expand Down Expand Up @@ -133,6 +134,31 @@ namespace hpx::parallel::util::detail {
}
};

template <typename I>
struct datapar_loop_step<I, std::enable_if_t<std::is_integral_v<I>>>
{
using V1 = traits::vector_pack_type_t<I, 1>;
using V = traits::vector_pack_type_t<I>;

template <typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void call1(F&& f, I& i)
{
V1 tmp(i);
HPX_INVOKE(f, tmp);
++i;
}

template <typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void callv(F&& f, I& i)
{
V tmp;
for (std::size_t e = 0; e != traits::size(tmp); ++e)
traits::set(tmp, e, static_cast<I>(i + e));
HPX_INVOKE(f, tmp);
i += traits::vector_pack_size_v<V>;
}
};

///////////////////////////////////////////////////////////////////////////
template <typename Iter, typename Enable = void>
struct datapar_loop_pred_step
Expand Down
77 changes: 67 additions & 10 deletions libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2007-2023 Hartmut Kaiser
// Copyright (c) 2007-2025 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
Expand All @@ -15,6 +15,7 @@
#include <hpx/execution/traits/vector_pack_load_store.hpp>
#include <hpx/execution/traits/vector_pack_type.hpp>
#include <hpx/executors/datapar/execution_policy.hpp>
#include <hpx/iterator_support/traits/is_iterator.hpp>
#include <hpx/parallel/datapar/iterator_helpers.hpp>
#include <hpx/parallel/util/loop.hpp>

Expand Down Expand Up @@ -238,8 +239,12 @@ namespace hpx::parallel::util {
};

///////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Enable = void>
struct datapar_loop_n;

template <typename Iterator>
struct datapar_loop_n
struct datapar_loop_n<Iterator,
std::enable_if_t<hpx::traits::is_iterator_v<Iterator>>>
{
using iterator_type = std::decay_t<Iterator>;
using value_type =
Expand All @@ -258,8 +263,9 @@ namespace hpx::parallel::util {
{
std::size_t len = count;

// clang-format off
for (/* */; !detail::is_data_aligned(first) && len != 0;
--len)
--len)
{
datapar_loop_step<InIter>::call1(f, first);
}
Expand All @@ -268,16 +274,18 @@ namespace hpx::parallel::util {

for (auto len_v =
static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step<InIter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
datapar_loop_step<InIter>::call1(f, first);
}

return first;
}
else
Expand All @@ -302,6 +310,51 @@ namespace hpx::parallel::util {
}
};

template <typename I>
struct datapar_loop_n<I, std::enable_if_t<std::is_integral_v<I>>>
{
using V = traits::vector_pack_type_t<I>;

template <typename Iter, typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call(
Iter first, std::size_t count, F&& f)
{
std::size_t len = count;
constexpr std::size_t size = traits::vector_pack_size_v<V>;

for (size_t i = first % size; i != 0 && len != 0; --i, --len)
{
datapar_loop_step<Iter>::call1(f, first);
}

// clang-format off
for (auto len_v = static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step<Iter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
datapar_loop_step<Iter>::call1(f, first);
}
return first;
}

template <typename Iter, typename CancelToken, typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call(
Iter first, std::size_t count, CancelToken& tok, F&& f)
{
// check at the start of a partition only
if (tok.was_cancelled())
return first;

return call(first, count, HPX_FORWARD(F, f));
}
};

///////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct datapar_loop_n_ind
Expand All @@ -323,8 +376,9 @@ namespace hpx::parallel::util {
{
std::size_t len = count;

// clang-format off
for (/* */; !detail::is_data_aligned(first) && len != 0;
--len)
--len)
{
datapar_loop_step_ind<InIter>::call1(f, first);
}
Expand All @@ -333,11 +387,12 @@ namespace hpx::parallel::util {

for (auto len_v =
static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step_ind<InIter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
Expand Down Expand Up @@ -381,14 +436,16 @@ namespace hpx::parallel::util {

constexpr std::size_t size = traits::vector_pack_size_v<V>;

// clang-format off
for (auto len_v = static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_idx_step<Iter>::callv(f, it, base_idx);
std::advance(it, size);
base_idx += size;
}
// clang-format on

for (/* */; len != 0; --len)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ if(HPX_WITH_DATAPAR)
foreach_datapar
foreach_datapar_zipiter
foreachn_datapar
for_loop_datapar
generate_datapar
generaten_datapar
mismatch_binary_datapar
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (c) 2016-2025 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/algorithm.hpp>
#include <hpx/datapar.hpp>
#include <hpx/init.hpp>
#include <hpx/modules/testing.hpp>

#include <algorithm>
#include <cstddef>
#include <iostream>
#include <numeric>
#include <random>
#include <string>
#include <utility>
#include <vector>

///////////////////////////////////////////////////////////////////////////////
unsigned int seed = std::random_device{}();
std::mt19937 gen(seed);

///////////////////////////////////////////////////////////////////////////////
template <typename ExPolicy>
void test_for_loop_idx(ExPolicy&& policy)
{
static_assert(hpx::is_execution_policy_v<ExPolicy>,
"hpx::is_execution_policy_v<ExPolicy>");

std::vector<std::size_t> c(10007);
std::iota(std::begin(c), std::end(c), gen());

hpx::experimental::for_loop(
std::forward<ExPolicy>(policy), 0, int(c.size()), [&c](auto i) {
for (std::size_t e = 0; e < hpx::parallel::traits::size(i); ++e)
c[hpx::parallel::traits::get(i, e)] = 42;
});

// verify values
std::size_t count = 0;
std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void {
HPX_TEST_EQ(v, std::size_t(42));
++count;
});
HPX_TEST_EQ(count, c.size());
}

template <typename ExPolicy>
void test_for_loop_idx_async(ExPolicy&& p)
{
std::vector<std::size_t> c(10007);
std::iota(std::begin(c), std::end(c), gen());

auto f = hpx::experimental::for_loop(
std::forward<ExPolicy>(p), 0, int(c.size()), [&c](auto i) {
for (std::size_t e = 0; e < hpx::parallel::traits::size(i); ++e)
c[hpx::parallel::traits::get(i, e)] = 42;
});
f.wait();

// verify values
std::size_t count = 0;
std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void {
HPX_TEST_EQ(v, std::size_t(42));
++count;
});
HPX_TEST_EQ(count, c.size());
}

void for_loop_test_idx()
{
using namespace hpx::execution;

test_for_loop_idx(simd);
test_for_loop_idx(par_simd);

test_for_loop_idx_async(simd(task));
test_for_loop_idx_async(par_simd(task));
}

///////////////////////////////////////////////////////////////////////////////
int hpx_main(hpx::program_options::variables_map& vm)
{
if (vm.count("seed"))
seed = vm["seed"].as<unsigned int>();

std::cout << "using seed: " << seed << std::endl;
gen.seed(seed);

for_loop_test_idx();

return hpx::local::finalize();
}

int main(int argc, char* argv[])
{
// add command line option which controls the random number generator seed
using namespace hpx::program_options;
options_description desc_commandline(
"Usage: " HPX_APPLICATION_STRING " [options]");

desc_commandline.add_options()("seed,s", value<unsigned int>(),
"the random number generator seed to use for this run");

// By default this test should run on all available cores
std::vector<std::string> const cfg = {"hpx.os_threads=all"};

// Initialize and run HPX
hpx::local::init_params init_args;
init_args.desc_cmdline = desc_commandline;
init_args.cfg = cfg;

HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0,
"HPX main exited with non-zero status");

return hpx::util::report_errors();
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,50 @@
#include <hpx/config.hpp>

#if defined(HPX_HAVE_DATAPAR_EVE)

#include <hpx/assert.hpp>
#include <hpx/concepts/concepts.hpp>
#include <hpx/execution/traits/detail/simd/vector_pack_simd.hpp>
#include <hpx/execution/traits/vector_pack_alignment_size.hpp>

#include <cstddef>

namespace hpx::parallel::traits {

///////////////////////////////////////////////////////////////////////
template <typename Vector>
template <typename Vector, HPX_CONCEPT_REQUIRES_(is_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto get(
Vector& vec, std::size_t index) noexcept
{
return vec.get(index);
}

template <typename Scalar,
HPX_CONCEPT_REQUIRES_(is_scalar_vector_pack_v<Scalar>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto get(
Scalar& sc, [[maybe_unused]] std::size_t index) noexcept
{
HPX_ASSERT(index == 0);
return sc;
}

///////////////////////////////////////////////////////////////////////
template <typename Vector, typename T>
template <typename Vector, typename T,
HPX_CONCEPT_REQUIRES_(is_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto set(
Vector& vec, std::size_t index, T val) noexcept
{
vec.set(index, val);
}

template <typename Scalar, typename T,
HPX_CONCEPT_REQUIRES_(is_scalar_vector_pack_v<Scalar>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto set(
Scalar& sc, [[maybe_unused]] std::size_t index, T val) noexcept
{
HPX_ASSERT(index == 0);
sc = val;
}
} // namespace hpx::parallel::traits

#endif
Loading

0 comments on commit 62d92ad

Please sign in to comment.