Skip to content

Commit

Permalink
Merge pull request #40580 from fwyzard/add_alpaka_kernel_tests
Browse files Browse the repository at this point in the history
Implement a simple vector add test with alpaka
  • Loading branch information
cmsbuild authored Jan 23, 2023
2 parents 7ec4cc6 + 60f46a9 commit 976aa1f
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 2 deletions.
4 changes: 2 additions & 2 deletions HeterogeneousCore/AlpakaInterface/interface/workdivision.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ namespace cms::alpakatools {
return *this;

// increment the thread index with the grid stride
first_ += stride_ * elements_;
first_ += stride_;
index_ = first_;
last_ = std::min(first_ + elements_, extent_);
if (index_ < extent_)
Expand Down Expand Up @@ -204,7 +204,7 @@ namespace cms::alpakatools {
return *this;

// increment the thread index along with the last dimension with the grid stride
first_[last_dimension] += stride_[last_dimension] * elements_[last_dimension];
first_[last_dimension] += stride_[last_dimension];
index_[last_dimension] = first_[last_dimension];
last_ = std::min(first_[last_dimension] + elements_[last_dimension], extent_[last_dimension]);
if (index_[last_dimension] < extent_[last_dimension])
Expand Down
7 changes: 7 additions & 0 deletions HeterogeneousCore/AlpakaInterface/test/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,10 @@
<use name="HeterogeneousCore/AlpakaInterface"/>
<flags ALPAKA_BACKENDS="1"/>
</bin>

<bin name="alpakaTestKernel" file="alpaka/testKernel.dev.cc">
<use name="alpaka"/>
<use name="catch2"/>
<use name="HeterogeneousCore/AlpakaInterface"/>
<flags ALPAKA_BACKENDS="1"/>
</bin>
127 changes: 127 additions & 0 deletions HeterogeneousCore/AlpakaInterface/test/alpaka/testKernel.dev.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#include <cstdio>
#include <random>

#include <alpaka/alpaka.hpp>

#define CATCH_CONFIG_MAIN
#include <catch.hpp>

#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
#include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
#include "HeterogeneousCore/AlpakaInterface/interface/vec.h"
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"

// each test binary is built for a single Alpaka backend
using namespace ALPAKA_ACCELERATOR_NAMESPACE;

static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel) "]";

struct VectorAddKernel {
template <typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(
TAcc const& acc, T const* __restrict__ in1, T const* __restrict__ in2, T* __restrict__ out, size_t size) const {
for (auto index : cms::alpakatools::elements_with_stride(acc, size)) {
out[index] = in1[index] + in2[index];
}
}
};

struct VectorAddKernel1D {
template <typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(
TAcc const& acc, T const* __restrict__ in1, T const* __restrict__ in2, T* __restrict__ out, Vec1D size) const {
for (auto ndindex : cms::alpakatools::elements_with_stride_nd(acc, size)) {
auto index = ndindex[0];
out[index] = in1[index] + in2[index];
}
}
};

TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel), s_tag) {
SECTION("VectorAddKernel") {
// get the list of devices on the current platform
auto const& devices = cms::alpakatools::devices<Platform>();
if (devices.empty()) {
std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)
<< ", the test will be skipped.\n";
return;
}

// random number generator with a gaussian distribution
std::random_device rd{};
std::default_random_engine rand{rd()};
std::normal_distribution<float> dist{0., 1.};

// tolerance
constexpr float epsilon = 0.000001;

// buffer size
constexpr size_t size = 1024 * 1024;

// allocate input and output host buffers
auto in1_h = cms::alpakatools::make_host_buffer<float[]>(size);
auto in2_h = cms::alpakatools::make_host_buffer<float[]>(size);
auto out_h = cms::alpakatools::make_host_buffer<float[]>(size);

// fill the input buffers with random data, and the output buffer with zeros
for (size_t i = 0; i < size; ++i) {
in1_h[i] = dist(rand);
in2_h[i] = dist(rand);
out_h[i] = 0.;
}

// run the test on each device
for (auto const& device : devices) {
std::cout << "Test 1D vector addition on " << alpaka::getName(device) << '\n';
auto queue = Queue(device);

// allocate input and output buffers on the device
auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);

// copy the input data to the device; the size is known from the buffer objects
alpaka::memcpy(queue, in1_d, in1_h);
alpaka::memcpy(queue, in2_d, in2_h);

// fill the output buffer with zeros; the size is known from the buffer objects
alpaka::memset(queue, out_d, 0.);

// launch the 1-dimensional kernel with scalar size
auto div = cms::alpakatools::make_workdiv<Acc1D>(4, 4);
alpaka::exec<Acc1D>(queue, div, VectorAddKernel{}, in1_d.data(), in2_d.data(), out_d.data(), size);

// copy the results from the device to the host
alpaka::memcpy(queue, out_h, out_d);

// wait for all the operations to complete
alpaka::wait(queue);

// check the results
for (size_t i = 0; i < size; ++i) {
float sum = in1_h[i] + in2_h[i];
REQUIRE(out_h[i] < sum + epsilon);
REQUIRE(out_h[i] > sum - epsilon);
}

// reset the output buffer on the device to all zeros
alpaka::memset(queue, out_d, 0.);

// launch the 1-dimensional kernel with vector size
alpaka::exec<Acc1D>(queue, div, VectorAddKernel1D{}, in1_d.data(), in2_d.data(), out_d.data(), size);

// copy the results from the device to the host
alpaka::memcpy(queue, out_h, out_d);

// wait for all the operations to complete
alpaka::wait(queue);

// check the results
for (size_t i = 0; i < size; ++i) {
float sum = in1_h[i] + in2_h[i];
REQUIRE(out_h[i] < sum + epsilon);
REQUIRE(out_h[i] > sum - epsilon);
}
}
}
}

0 comments on commit 976aa1f

Please sign in to comment.