Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a simple vector add test with alpaka #40580

Merged
merged 2 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions HeterogeneousCore/AlpakaInterface/interface/workdivision.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ namespace cms::alpakatools {
return *this;

// increment the thread index with the grid stride
first_ += stride_ * elements_;
first_ += stride_;
index_ = first_;
last_ = std::min(first_ + elements_, extent_);
if (index_ < extent_)
Expand Down Expand Up @@ -204,7 +204,7 @@ namespace cms::alpakatools {
return *this;

// increment the thread index along with the last dimension with the grid stride
first_[last_dimension] += stride_[last_dimension] * elements_[last_dimension];
first_[last_dimension] += stride_[last_dimension];
index_[last_dimension] = first_[last_dimension];
last_ = std::min(first_[last_dimension] + elements_[last_dimension], extent_[last_dimension]);
if (index_[last_dimension] < extent_[last_dimension])
Expand Down
7 changes: 7 additions & 0 deletions HeterogeneousCore/AlpakaInterface/test/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,10 @@
<use name="HeterogeneousCore/AlpakaInterface"/>
<flags ALPAKA_BACKENDS="1"/>
</bin>

<bin name="alpakaTestKernel" file="alpaka/testKernel.dev.cc">
<use name="alpaka"/>
<use name="catch2"/>
<use name="HeterogeneousCore/AlpakaInterface"/>
<flags ALPAKA_BACKENDS="1"/>
</bin>
127 changes: 127 additions & 0 deletions HeterogeneousCore/AlpakaInterface/test/alpaka/testKernel.dev.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#include <cstdio>
#include <random>

#include <alpaka/alpaka.hpp>

#define CATCH_CONFIG_MAIN
#include <catch.hpp>

#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
#include "HeterogeneousCore/AlpakaInterface/interface/memory.h"
#include "HeterogeneousCore/AlpakaInterface/interface/vec.h"
#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h"

// each test binary is built for a single Alpaka backend
using namespace ALPAKA_ACCELERATOR_NAMESPACE;

static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel) "]";

struct VectorAddKernel {
template <typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(
TAcc const& acc, T const* __restrict__ in1, T const* __restrict__ in2, T* __restrict__ out, size_t size) const {
for (auto index : cms::alpakatools::elements_with_stride(acc, size)) {
out[index] = in1[index] + in2[index];
}
}
};

struct VectorAddKernel1D {
template <typename TAcc, typename T>
ALPAKA_FN_ACC void operator()(
TAcc const& acc, T const* __restrict__ in1, T const* __restrict__ in2, T* __restrict__ out, Vec1D size) const {
for (auto ndindex : cms::alpakatools::elements_with_stride_nd(acc, size)) {
auto index = ndindex[0];
out[index] = in1[index] + in2[index];
}
}
};

TEST_CASE("Standard checks of " ALPAKA_TYPE_ALIAS_NAME(alpakaTestKernel), s_tag) {
SECTION("VectorAddKernel") {
// get the list of devices on the current platform
auto const& devices = cms::alpakatools::devices<Platform>();
if (devices.empty()) {
std::cout << "No devices available on the platform " << EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)
<< ", the test will be skipped.\n";
return;
}

// random number generator with a gaussian distribution
std::random_device rd{};
std::default_random_engine rand{rd()};
std::normal_distribution<float> dist{0., 1.};

// tolerance
constexpr float epsilon = 0.000001;

// buffer size
constexpr size_t size = 1024 * 1024;

// allocate input and output host buffers
auto in1_h = cms::alpakatools::make_host_buffer<float[]>(size);
auto in2_h = cms::alpakatools::make_host_buffer<float[]>(size);
auto out_h = cms::alpakatools::make_host_buffer<float[]>(size);

// fill the input buffers with random data, and the output buffer with zeros
for (size_t i = 0; i < size; ++i) {
in1_h[i] = dist(rand);
in2_h[i] = dist(rand);
out_h[i] = 0.;
}

// run the test on each device
for (auto const& device : devices) {
std::cout << "Test 1D vector addition on " << alpaka::getName(device) << '\n';
auto queue = Queue(device);

// allocate input and output buffers on the device
auto in1_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
auto in2_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);
auto out_d = cms::alpakatools::make_device_buffer<float[]>(queue, size);

// copy the input data to the device; the size is known from the buffer objects
alpaka::memcpy(queue, in1_d, in1_h);
alpaka::memcpy(queue, in2_d, in2_h);

// fill the output buffer with zeros; the size is known from the buffer objects
alpaka::memset(queue, out_d, 0.);

// launch the 1-dimensional kernel with scalar size
auto div = cms::alpakatools::make_workdiv<Acc1D>(4, 4);
alpaka::exec<Acc1D>(queue, div, VectorAddKernel{}, in1_d.data(), in2_d.data(), out_d.data(), size);

// copy the results from the device to the host
alpaka::memcpy(queue, out_h, out_d);

// wait for all the operations to complete
alpaka::wait(queue);

// check the results
for (size_t i = 0; i < size; ++i) {
float sum = in1_h[i] + in2_h[i];
REQUIRE(out_h[i] < sum + epsilon);
REQUIRE(out_h[i] > sum - epsilon);
}

// reset the output buffer on the device to all zeros
alpaka::memset(queue, out_d, 0.);

// launch the 1-dimensional kernel with vector size
alpaka::exec<Acc1D>(queue, div, VectorAddKernel1D{}, in1_d.data(), in2_d.data(), out_d.data(), size);

// copy the results from the device to the host
alpaka::memcpy(queue, out_h, out_d);

// wait for all the operations to complete
alpaka::wait(queue);

// check the results
for (size_t i = 0; i < size; ++i) {
float sum = in1_h[i] + in2_h[i];
REQUIRE(out_h[i] < sum + epsilon);
REQUIRE(out_h[i] > sum - epsilon);
}
}
}
}