diff --git a/SYCL/GroupAlgorithm/exclusive_scan_sycl2020.cpp b/SYCL/GroupAlgorithm/exclusive_scan_sycl2020.cpp new file mode 100644 index 0000000000..5424cb9d31 --- /dev/null +++ b/SYCL/GroupAlgorithm/exclusive_scan_sycl2020.cpp @@ -0,0 +1,180 @@ +// UNSUPPORTED: cuda +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o %t13.out + +#include "support.h" +#include +#include +#include +#include +#include +#include +using namespace sycl; + +template +class exclusive_scan_kernel; + +// std::exclusive_scan isn't implemented yet, so use serial implementation +// instead +// TODO: use std::exclusive_scan when it will be supported +namespace emu { +template +OutputIterator exclusive_scan(InputIterator first, InputIterator last, + OutputIterator result, T init, + BinaryOperation binary_op) { + T partial = init; + for (InputIterator it = first; it != last; ++it) { + *(result++) = partial; + partial = binary_op(partial, *it); + } + return result; +} +} // namespace emu + +template +void test(queue q, InputContainer input, OutputContainer output, + BinaryOperation binary_op, + typename OutputContainer::value_type identity) { + typedef typename InputContainer::value_type InputT; + typedef typename OutputContainer::value_type OutputT; + typedef class exclusive_scan_kernel kernel_name0; + constexpr size_t G = 64; + constexpr size_t N = input.size(); + std::vector expected(N); + + // checking + // template + // T exclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[lid] = exclusive_scan_over_group(g, in[lid], binary_op); + }); + }); + } + emu::exclusive_scan(input.begin(), input.begin() + G, expected.begin(), + identity, binary_op); + assert(std::equal(output.begin(), output.begin() + G, expected.begin())); + + typedef class exclusive_scan_kernel kernel_name1; + constexpr OutputT init = 42; + + // checking + // template + // T exclusive_scan_over_group(Group g, V x, T init, BinaryOperation + // binary_op) + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[lid] = exclusive_scan_over_group(g, in[lid], init, binary_op); + }); + }); + } + emu::exclusive_scan(input.begin(), input.begin() + G, expected.begin(), init, + binary_op); + assert(std::equal(output.begin(), output.begin() + G, expected.begin())); + + typedef class exclusive_scan_kernel kernel_name2; + + // checking + // template + // OutPtr joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr + // result, + // BinaryOperation binary_op) + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + joint_exclusive_scan(g, in.get_pointer(), in.get_pointer() + N, + out.get_pointer(), binary_op); + }); + }); + } + emu::exclusive_scan(input.begin(), input.begin() + N, expected.begin(), + identity, binary_op); + assert(std::equal(output.begin(), output.begin() + N, expected.begin())); + + typedef class exclusive_scan_kernel kernel_name3; + + // checking + // template + // OutPtr joint_exclusive_scan(Group g, InPtr first, InPtr last, OutPtr + // result, T init, + // BinaryOperation binary_op) + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + joint_exclusive_scan(g, in.get_pointer(), in.get_pointer() + N, + out.get_pointer(), init, binary_op); + }); + }); + } + emu::exclusive_scan(input.begin(), input.begin() + N, expected.begin(), init, + binary_op); + assert(std::equal(output.begin(), output.begin() + N, expected.begin())); +} + +int main() { + queue q; + if (!isSupportedDevice(q.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 128; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), 0); + + test(q, input, output, sycl::plus<>(), 0); + test(q, input, output, sycl::minimum<>(), + std::numeric_limits::max()); + test(q, input, output, sycl::maximum<>(), + std::numeric_limits::lowest()); + + test(q, input, output, sycl::plus(), 0); + test(q, input, output, sycl::minimum(), + std::numeric_limits::max()); + test(q, input, output, sycl::maximum(), + std::numeric_limits::lowest()); + +#ifdef SPIRV_1_3 + test(q, input, output, sycl::multiplies(), + 1); + test(q, input, output, sycl::bit_or(), 0); + test(q, input, output, sycl::bit_xor(), 0); + test(q, input, output, sycl::bit_and(), ~0); +#endif // SPIRV_1_3 + + std::cout << "Test passed." << std::endl; +} diff --git a/SYCL/GroupAlgorithm/inclusive_scan_sycl2020.cpp b/SYCL/GroupAlgorithm/inclusive_scan_sycl2020.cpp new file mode 100644 index 0000000000..c9c76e06fb --- /dev/null +++ b/SYCL/GroupAlgorithm/inclusive_scan_sycl2020.cpp @@ -0,0 +1,180 @@ +// UNSUPPORTED: cuda +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o %t13.out + +#include "support.h" +#include +#include +#include +#include +#include +#include +using namespace sycl; + +template +class inclusive_scan_kernel; + +// std::inclusive_scan isn't implemented yet, so use serial implementation +// instead +// TODO: use std::inclusive_scan when it will be supported +namespace emu { +template +OutputIterator inclusive_scan(InputIterator first, InputIterator last, + OutputIterator result, BinaryOperation binary_op, + T init) { + T partial = init; + for (InputIterator it = first; it != last; ++it) { + partial = binary_op(partial, *it); + *(result++) = partial; + } + return result; +} +} // namespace emu + +template +void test(queue q, InputContainer input, OutputContainer output, + BinaryOperation binary_op, + typename OutputContainer::value_type identity) { + typedef typename InputContainer::value_type InputT; + typedef typename OutputContainer::value_type OutputT; + typedef class inclusive_scan_kernel kernel_name0; + constexpr size_t N = input.size(); + constexpr size_t G = 64; + std::vector expected(N); + + // checking + // template + // T inclusive_scan_over_group(Group g, T x, BinaryOperation binary_op) + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[lid] = inclusive_scan_over_group(g, in[lid], binary_op); + }); + }); + } + emu::inclusive_scan(input.begin(), input.begin() + G, expected.begin(), + binary_op, identity); + assert(std::equal(output.begin(), output.begin() + G, expected.begin())); + + typedef class inclusive_scan_kernel kernel_name1; + constexpr OutputT init = 42; + + // checking + // template + // T inclusive_scan_over_group(Group g, V x, BinaryOperation binary_op, T + // init) + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[lid] = inclusive_scan_over_group(g, in[lid], binary_op, init); + }); + }); + } + emu::inclusive_scan(input.begin(), input.begin() + G, expected.begin(), + binary_op, init); + assert(std::equal(output.begin(), output.begin() + G, expected.begin())); + + typedef class inclusive_scan_kernel kernel_name2; + + // checking + // template + // OutPtr joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr + // result, + // BinaryOperation binary_op) + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + joint_inclusive_scan(g, in.get_pointer(), in.get_pointer() + N, + out.get_pointer(), binary_op); + }); + }); + } + emu::inclusive_scan(input.begin(), input.begin() + N, expected.begin(), + binary_op, identity); + assert(std::equal(output.begin(), output.begin() + N, expected.begin())); + + typedef class inclusive_scan_kernel kernel_name3; + + // checking + // template + // OutPtr joint_inclusive_scan(Group g, InPtr first, InPtr last, OutPtr + // result, + // BinaryOperation binary_op, T init) + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for(nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + joint_inclusive_scan(g, in.get_pointer(), in.get_pointer() + N, + out.get_pointer(), binary_op, init); + }); + }); + } + emu::inclusive_scan(input.begin(), input.begin() + N, expected.begin(), + binary_op, init); + assert(std::equal(output.begin(), output.begin() + N, expected.begin())); +} + +int main() { + queue q; + if (!isSupportedDevice(q.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 128; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), 0); + + test(q, input, output, sycl::plus<>(), 0); + test(q, input, output, sycl::minimum<>(), + std::numeric_limits::max()); + test(q, input, output, sycl::maximum<>(), + std::numeric_limits::lowest()); + + test(q, input, output, sycl::plus(), 0); + test(q, input, output, sycl::minimum(), + std::numeric_limits::max()); + test(q, input, output, sycl::maximum(), + std::numeric_limits::lowest()); + +#ifdef SPIRV_1_3 + test(q, input, output, sycl::multiplies(), + 1); + test(q, input, output, sycl::bit_or(), 0); + test(q, input, output, sycl::bit_xor(), 0); + test(q, input, output, sycl::bit_and(), ~0); +#endif // SPIRV_1_3 + + std::cout << "Test passed." << std::endl; +} diff --git a/SYCL/GroupAlgorithm/reduce_sycl2020.cpp b/SYCL/GroupAlgorithm/reduce_sycl2020.cpp new file mode 100644 index 0000000000..82259c7435 --- /dev/null +++ b/SYCL/GroupAlgorithm/reduce_sycl2020.cpp @@ -0,0 +1,93 @@ +// UNSUPPORTED: cuda +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DSPIRV_1_3 %s -I . -o %t13.out + +#include "support.h" +#include +#include +#include +#include +#include +using namespace sycl; + +template +void test(queue q, InputContainer input, OutputContainer output, + BinaryOperation binary_op, + typename OutputContainer::value_type identity) { + typedef typename InputContainer::value_type InputT; + typedef typename OutputContainer::value_type OutputT; + constexpr OutputT init = 42; + constexpr size_t N = input.size(); + constexpr size_t G = 64; + { + buffer in_buf(input.data(), input.size()); + buffer out_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { + accessor in{in_buf, cgh, sycl::read_only}; + accessor out{out_buf, cgh, sycl::write_only, sycl::no_init}; + cgh.parallel_for( + nd_range<1>(G, G), [=](nd_item<1> it) { + group<1> g = it.get_group(); + int lid = it.get_local_id(0); + out[0] = reduce_over_group(g, in[lid], binary_op); + out[1] = reduce_over_group(g, in[lid], init, binary_op); + out[2] = joint_reduce(g, in.get_pointer(), in.get_pointer() + N, + binary_op); + out[3] = joint_reduce(g, in.get_pointer(), in.get_pointer() + N, + init, binary_op); + }); + }); + } + // std::reduce is not implemented yet, so use std::accumulate instead + // TODO: use std::reduce when it will be supported + assert(output[0] == std::accumulate(input.begin(), input.begin() + G, + identity, binary_op)); + assert(output[1] == + std::accumulate(input.begin(), input.begin() + G, init, binary_op)); + assert(output[2] == + std::accumulate(input.begin(), input.end(), identity, binary_op)); + assert(output[3] == + std::accumulate(input.begin(), input.end(), init, binary_op)); +} + +int main() { + queue q; + if (!isSupportedDevice(q.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + + constexpr int N = 128; + std::array input; + std::array output; + std::iota(input.begin(), input.end(), 0); + std::fill(output.begin(), output.end(), 0); + + test(q, input, output, sycl::plus<>(), 0); + test(q, input, output, sycl::minimum<>(), + std::numeric_limits::max()); + test(q, input, output, sycl::maximum<>(), + std::numeric_limits::lowest()); + + test(q, input, output, sycl::plus(), 0); + test(q, input, output, sycl::minimum(), + std::numeric_limits::max()); + test(q, input, output, sycl::maximum(), + std::numeric_limits::lowest()); + +#ifdef SPIRV_1_3 + test(q, input, output, sycl::multiplies(), + 1); + test(q, input, output, sycl::bit_or(), 0); + test(q, input, output, sycl::bit_xor(), 0); + test(q, input, output, sycl::bit_and(), ~0); +#endif // SPIRV_1_3 + + std::cout << "Test passed." << std::endl; +}