Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Add tests for SYCL 2020 function objects #310

Merged
merged 18 commits into from
Jul 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 180 additions & 0 deletions SYCL/GroupAlgorithm/exclusive_scan_sycl2020.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
// UNSUPPORTED: cuda
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o %t13.out

#include "support.h"
#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <limits>
#include <numeric>
#include <vector>
using namespace sycl;

template <class SpecializationKernelName, int TestNumber>
class exclusive_scan_kernel;

// std::exclusive_scan isn't implemented yet, so use serial implementation
// instead
Comment on lines +21 to +22

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we compile everything with C++17, can we replace this with std::exclusive_scan?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed using of emu::exclusive scan to std::exclusive_scan

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that Jenkins pre-ci linux/cuda compiles with c++14. Tests are failing with message "no member named 'exclusive_scan' in namespace 'std'"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@maximdimakov, -fsycl triggers -std=c++17, so it should be compiled with c++17. Looks strange, does it reproduce locally?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can not reproduce this problem locally. The errors occurs when I explicitly specify -std=c++14. Without -std flag or with -std=c++17 the test passed

Copy link

@dm-vodopyanov dm-vodopyanov Jun 9, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like the environment problem of CI machine, could be outdated libstdc++ which doesn't contain implementation for std::inclusive_scan and std::exclusive_scan. @tfzhu, can you please take a look on this issue and try to resolve it?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also tagging @bader to be aware about this CI problem.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was the problem resolved? If not, could you please leave a TODO here about using std::exclusive_scan later?

// TODO: use std::exclusive_scan when it will be supported
namespace emu {
template <typename InputIterator, typename OutputIterator,
class BinaryOperation, typename T>
OutputIterator exclusive_scan(InputIterator first, InputIterator last,
OutputIterator result, T init,
BinaryOperation binary_op) {
T partial = init;
for (InputIterator it = first; it != last; ++it) {
*(result++) = partial;
partial = binary_op(partial, *it);
}
return result;
}
} // namespace emu

template <typename SpecializationKernelName, typename InputContainer,
typename OutputContainer, class BinaryOperation>
void test(queue q, InputContainer input, OutputContainer output,
BinaryOperation binary_op,
typename OutputContainer::value_type identity) {
typedef typename InputContainer::value_type InputT;
typedef typename OutputContainer::value_type OutputT;
typedef class exclusive_scan_kernel<SpecializationKernelName, 0> kernel_name0;
constexpr size_t G = 64;
constexpr size_t N = input.size();
std::vector<OutputT> expected(N);

// checking
// template <typename Group, typename T, typename BinaryOperation>
// T exclusive_scan_over_group(Group g, T x, BinaryOperation binary_op)
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name0>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[lid] = exclusive_scan_over_group(g, in[lid], binary_op);
});
});
}
emu::exclusive_scan(input.begin(), input.begin() + G, expected.begin(),
identity, binary_op);
assert(std::equal(output.begin(), output.begin() + G, expected.begin()));

typedef class exclusive_scan_kernel<SpecializationKernelName, 1> kernel_name1;
constexpr OutputT init = 42;

// checking
// template <typename Group, typename V, typename T, class BinaryOperation>
// T exclusive_scan_over_group(Group g, V x, T init, BinaryOperation
// binary_op)
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name1>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[lid] = exclusive_scan_over_group(g, in[lid], init, binary_op);
});
});
}
emu::exclusive_scan(input.begin(), input.begin() + G, expected.begin(), init,
binary_op);
assert(std::equal(output.begin(), output.begin() + G, expected.begin()));

typedef class exclusive_scan_kernel<SpecializationKernelName, 2> kernel_name2;

// checking
// template <typename Group, typename InPtr, typename OutPtr,
// class BinaryOperation>
// OutPtr joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr
// result,
// BinaryOperation binary_op)
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name2>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
joint_exclusive_scan(g, in.get_pointer(), in.get_pointer() + N,
out.get_pointer(), binary_op);
});
});
}
emu::exclusive_scan(input.begin(), input.begin() + N, expected.begin(),
identity, binary_op);
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));

typedef class exclusive_scan_kernel<SpecializationKernelName, 3> kernel_name3;

// checking
// template <typename Group, typename InPtr, typename OutPtr, typename T,
// class BinaryOperation>
// OutPtr joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr
// result, T init,
// BinaryOperation binary_op)
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name3>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
joint_exclusive_scan(g, in.get_pointer(), in.get_pointer() + N,
out.get_pointer(), init, binary_op);
});
});
}
emu::exclusive_scan(input.begin(), input.begin() + N, expected.begin(), init,
binary_op);
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));
}

int main() {
queue q;
if (!isSupportedDevice(q.get_device())) {
std::cout << "Skipping test\n";
return 0;
}

constexpr int N = 128;
std::array<int, N> input;
std::array<int, N> output;
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), 0);

test<class KernelNamePlusV>(q, input, output, sycl::plus<>(), 0);
test<class KernelNameMinimumV>(q, input, output, sycl::minimum<>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumV>(q, input, output, sycl::maximum<>(),
std::numeric_limits<int>::lowest());

test<class KernelNamePlusI>(q, input, output, sycl::plus<int>(), 0);
test<class KernelNameMinimumI>(q, input, output, sycl::minimum<int>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumI>(q, input, output, sycl::maximum<int>(),
std::numeric_limits<int>::lowest());

#ifdef SPIRV_1_3
test<class KernelNameMultipliesI>(q, input, output, sycl::multiplies<int>(),
1);
test<class KernelNameBitOrI>(q, input, output, sycl::bit_or<int>(), 0);
test<class KernelNameBitXorI>(q, input, output, sycl::bit_xor<int>(), 0);
test<class KernelNameBitAndI>(q, input, output, sycl::bit_and<int>(), ~0);
#endif // SPIRV_1_3

std::cout << "Test passed." << std::endl;
}
180 changes: 180 additions & 0 deletions SYCL/GroupAlgorithm/inclusive_scan_sycl2020.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
// UNSUPPORTED: cuda
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o %t13.out

#include "support.h"
#include <CL/sycl.hpp>
#include <algorithm>
#include <cassert>
#include <limits>
#include <numeric>
#include <vector>
using namespace sycl;

template <class SpecializationKernelName, int TestNumber>
class inclusive_scan_kernel;

// std::inclusive_scan isn't implemented yet, so use serial implementation
// instead
// TODO: use std::inclusive_scan when it will be supported
namespace emu {
template <typename InputIterator, typename OutputIterator,
class BinaryOperation, typename T>
OutputIterator inclusive_scan(InputIterator first, InputIterator last,
OutputIterator result, BinaryOperation binary_op,
T init) {
T partial = init;
for (InputIterator it = first; it != last; ++it) {
partial = binary_op(partial, *it);
*(result++) = partial;
}
return result;
}
} // namespace emu

template <typename SpecializationKernelName, typename InputContainer,
typename OutputContainer, class BinaryOperation>
void test(queue q, InputContainer input, OutputContainer output,
BinaryOperation binary_op,
typename OutputContainer::value_type identity) {
typedef typename InputContainer::value_type InputT;
typedef typename OutputContainer::value_type OutputT;
typedef class inclusive_scan_kernel<SpecializationKernelName, 0> kernel_name0;
constexpr size_t N = input.size();
constexpr size_t G = 64;
std::vector<OutputT> expected(N);

// checking
// template <typename Group, typename T, class BinaryOperation>
// T inclusive_scan_over_group(Group g, T x, BinaryOperation binary_op)
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name0>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[lid] = inclusive_scan_over_group(g, in[lid], binary_op);
});
});
}
emu::inclusive_scan(input.begin(), input.begin() + G, expected.begin(),
binary_op, identity);
assert(std::equal(output.begin(), output.begin() + G, expected.begin()));

typedef class inclusive_scan_kernel<SpecializationKernelName, 1> kernel_name1;
constexpr OutputT init = 42;

// checking
// template <typename Group, typename V, class BinaryOperation, typename T>
// T inclusive_scan_over_group(Group g, V x, BinaryOperation binary_op, T
// init)
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name1>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
int lid = it.get_local_id(0);
out[lid] = inclusive_scan_over_group(g, in[lid], binary_op, init);
});
});
}
emu::inclusive_scan(input.begin(), input.begin() + G, expected.begin(),
binary_op, init);
assert(std::equal(output.begin(), output.begin() + G, expected.begin()));

typedef class inclusive_scan_kernel<SpecializationKernelName, 2> kernel_name2;

// checking
// template <typename Group, typename InPtr, typename OutPtr,
// class BinaryOperation>
// OutPtr joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr
// result,
// BinaryOperation binary_op)
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name2>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
joint_inclusive_scan(g, in.get_pointer(), in.get_pointer() + N,
out.get_pointer(), binary_op);
});
});
}
emu::inclusive_scan(input.begin(), input.begin() + N, expected.begin(),
binary_op, identity);
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));

typedef class inclusive_scan_kernel<SpecializationKernelName, 3> kernel_name3;

// checking
// template <typename Group, typename InPtr, typename OutPtr,
// class BinaryOperation, typename T>
// OutPtr joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr
// result,
// BinaryOperation binary_op, T init)
{
buffer<InputT> in_buf(input.data(), input.size());
buffer<OutputT> out_buf(output.data(), output.size());
q.submit([&](handler &cgh) {
accessor in{in_buf, cgh, sycl::read_only};
accessor out{out_buf, cgh, sycl::write_only, sycl::no_init};
cgh.parallel_for<kernel_name3>(nd_range<1>(G, G), [=](nd_item<1> it) {
group<1> g = it.get_group();
joint_inclusive_scan(g, in.get_pointer(), in.get_pointer() + N,
out.get_pointer(), binary_op, init);
});
});
}
emu::inclusive_scan(input.begin(), input.begin() + N, expected.begin(),
binary_op, init);
assert(std::equal(output.begin(), output.begin() + N, expected.begin()));
}

int main() {
queue q;
if (!isSupportedDevice(q.get_device())) {
std::cout << "Skipping test\n";
return 0;
}

constexpr int N = 128;
std::array<int, N> input;
std::array<int, N> output;
std::iota(input.begin(), input.end(), 0);
std::fill(output.begin(), output.end(), 0);

test<class KernelNamePlusV>(q, input, output, sycl::plus<>(), 0);
test<class KernelNameMinimumV>(q, input, output, sycl::minimum<>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumV>(q, input, output, sycl::maximum<>(),
std::numeric_limits<int>::lowest());

test<class KernelNamePlusI>(q, input, output, sycl::plus<int>(), 0);
test<class KernelNameMinimumI>(q, input, output, sycl::minimum<int>(),
std::numeric_limits<int>::max());
test<class KernelNameMaximumI>(q, input, output, sycl::maximum<int>(),
std::numeric_limits<int>::lowest());

#ifdef SPIRV_1_3
test<class KernelNameMultipliesI>(q, input, output, sycl::multiplies<int>(),
1);
test<class KernelNameBitOrI>(q, input, output, sycl::bit_or<int>(), 0);
test<class KernelNameBitXorI>(q, input, output, sycl::bit_xor<int>(), 0);
test<class KernelNameBitAndI>(q, input, output, sycl::bit_and<int>(), ~0);
#endif // SPIRV_1_3

std::cout << "Test passed." << std::endl;
}
Loading