diff --git a/SYCL/SubGroup/helper.hpp b/SYCL/SubGroup/helper.hpp index 9f4d29ad5e..964fb742bc 100644 --- a/SYCL/SubGroup/helper.hpp +++ b/SYCL/SubGroup/helper.hpp @@ -154,7 +154,7 @@ void exit_if_not_equal_vec(vec val, vec ref, const char *name) { } bool core_sg_supported(const device &Device) { - return (Device.has_extension("cl_khr_subgroups") || - Device.get_info().find(" 2.1") != - string_class::npos); + if (Device.has_extension("cl_khr_subgroups")) + return true; + return Device.get_info() >= "2.1"; } diff --git a/SYCL/SubGroup/reduce.hpp b/SYCL/SubGroup/reduce.hpp index f606dcf5e9..4d181fe140 100644 --- a/SYCL/SubGroup/reduce.hpp +++ b/SYCL/SubGroup/reduce.hpp @@ -88,7 +88,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { check_op, T>(Queue, T(0), ONEAPI::maximum(), true, G, L); -#if __cplusplus >= 201402L + // Transparent operator functors. check_op, T>(Queue, T(L), ONEAPI::plus<>(), false, G, L); @@ -107,5 +107,52 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { check_op< sycl_subgr, T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L); -#endif +} + +template +void check_mul(queue &Queue, size_t G = 256, size_t L = 4) { + check_op, T>( + Queue, T(G), ONEAPI::multiplies(), false, G, L); + check_op, T>( + Queue, T(1), ONEAPI::multiplies(), true, G, L); + + // Transparent operator functors. + check_op, T>( + Queue, T(G), ONEAPI::multiplies<>(), false, G, L); + check_op, T>( + Queue, T(1), ONEAPI::multiplies<>(), true, G, L); +} + +template +void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) { + check_op, T>( + Queue, T(G), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); + + // Transparent operator functors + check_op, T>( + Queue, T(G), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); } diff --git a/SYCL/SubGroup/reduce_fp16.cpp b/SYCL/SubGroup/reduce_fp16.cpp index 1d6e249eb1..323f3e63b2 100644 --- a/SYCL/SubGroup/reduce_fp16.cpp +++ b/SYCL/SubGroup/reduce_fp16.cpp @@ -1,18 +1,14 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -///==---------- reduce_fp16.cpp - SYCL sub_group reduce test ----*- C++ -*--==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// + +// This test verifies the correct work of the sub-group algorithm reduce(). #include "reduce.hpp" int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp16")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/reduce_fp64.cpp b/SYCL/SubGroup/reduce_fp64.cpp index 2e4699d35d..78f7994466 100644 --- a/SYCL/SubGroup/reduce_fp64.cpp +++ b/SYCL/SubGroup/reduce_fp64.cpp @@ -7,19 +7,15 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -///==---------- reduce_fp64.cpp - SYCL sub_group reduce test ----*- C++ -*--==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// + +// This test verifies the correct work of the sub-group algorithm reduce(). #include "reduce.hpp" int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp64")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/reduce_spirv13.cpp b/SYCL/SubGroup/reduce_spirv13.cpp new file mode 100644 index 0000000000..82ff043ccf --- /dev/null +++ b/SYCL/SubGroup/reduce_spirv13.cpp @@ -0,0 +1,39 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA + +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 reduce algorithm +// used with the operation MUL, bitwise OR, XOR, AND. + +#include "reduce.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + return 0; +} diff --git a/SYCL/SubGroup/reduce_spirv13_fp16.cpp b/SYCL/SubGroup/reduce_spirv13_fp16.cpp new file mode 100644 index 0000000000..e60826e99f --- /dev/null +++ b/SYCL/SubGroup/reduce_spirv13_fp16.cpp @@ -0,0 +1,21 @@ +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 reduce algorithm +// used with MUL operation. + +#include "reduce.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp16")) { + std::cout << "Skipping test\n"; + return 0; + } + check_mul(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/SYCL/SubGroup/reduce_spirv13_fp64.cpp b/SYCL/SubGroup/reduce_spirv13_fp64.cpp new file mode 100644 index 0000000000..79ea2e1939 --- /dev/null +++ b/SYCL/SubGroup/reduce_spirv13_fp64.cpp @@ -0,0 +1,28 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA + +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 reduce algorithm +// used with MUL operation. + +#include "reduce.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp64")) { + std::cout << "Skipping test\n"; + return 0; + } + check_mul(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/SYCL/SubGroup/scan.hpp b/SYCL/SubGroup/scan.hpp index c9d630dea2..3597eb2ba5 100644 --- a/SYCL/SubGroup/scan.hpp +++ b/SYCL/SubGroup/scan.hpp @@ -115,7 +115,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { Queue, std::numeric_limits::min(), ONEAPI::maximum(), true, G, L); } -#if __cplusplus >= 201402L + // Transparent operator functors. check_op, T>( Queue, T(L), ONEAPI::plus<>(), false, G, L); check_op, T>( @@ -150,5 +150,51 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { T>(Queue, std::numeric_limits::min(), ONEAPI::maximum<>(), true, G, L); } -#endif +} + +template +void check_mul(queue &Queue, size_t G = 256, size_t L = 4) { + check_op, T>( + Queue, T(L), ONEAPI::multiplies(), false, G, L); + check_op, T>( + Queue, T(1), ONEAPI::multiplies<>(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::multiplies(), false, G, L); + check_op, T>( + Queue, T(1), ONEAPI::multiplies<>(), true, G, L); +} + +template +void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) { + check_op, T>( + Queue, T(L), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); + + // Transparent operator functors. + check_op, T>( + Queue, T(L), ONEAPI::bit_or<>(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or<>(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_xor<>(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor<>(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_and<>(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and<>(), true, G, L); } diff --git a/SYCL/SubGroup/scan_fp16.cpp b/SYCL/SubGroup/scan_fp16.cpp index 47bd49b6a0..dc73279f50 100644 --- a/SYCL/SubGroup/scan_fp16.cpp +++ b/SYCL/SubGroup/scan_fp16.cpp @@ -1,19 +1,15 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -//==---------- scan_fp16.cpp - SYCL sub_group scan test --------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// +// This test verifies the correct work of the sub-group algorithms +// exclusive_scan() and inclusive_scan(). #include "scan.hpp" int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp16")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/scan_fp64.cpp b/SYCL/SubGroup/scan_fp64.cpp index 07409b3e82..14b1383f69 100644 --- a/SYCL/SubGroup/scan_fp64.cpp +++ b/SYCL/SubGroup/scan_fp64.cpp @@ -8,19 +8,15 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==---------- scan_fp64.cpp - SYCL sub_group scan test --------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// +// This test verifies the correct work of the sub-group algorithms +// exclusive_scan() and inclusive_scan(). #include "scan.hpp" int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp64")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/scan_spirv13.cpp b/SYCL/SubGroup/scan_spirv13.cpp new file mode 100644 index 0000000000..36e484754e --- /dev/null +++ b/SYCL/SubGroup/scan_spirv13.cpp @@ -0,0 +1,39 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA + +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and +// inclusive_scan() algoriths used with the operation MUL, bitwise OR, XOR, AND. + +#include "scan.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/SYCL/SubGroup/scan_spirv13_fp16.cpp b/SYCL/SubGroup/scan_spirv13_fp16.cpp new file mode 100644 index 0000000000..62265ab8c0 --- /dev/null +++ b/SYCL/SubGroup/scan_spirv13_fp16.cpp @@ -0,0 +1,21 @@ +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and +// inclusive_scan() algoriths used with the MUL operation. + +#include "scan.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp16")) { + std::cout << "Skipping test\n"; + return 0; + } + check_mul(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/SYCL/SubGroup/scan_spirv13_fp64.cpp b/SYCL/SubGroup/scan_spirv13_fp64.cpp new file mode 100644 index 0000000000..c1bcbed831 --- /dev/null +++ b/SYCL/SubGroup/scan_spirv13_fp64.cpp @@ -0,0 +1,28 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA + +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and +// inclusive_scan() algoriths used with the MUL operation. + +#include "scan.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp64")) { + std::cout << "Skipping test\n"; + return 0; + } + check(Queue); + std::cout << "Test passed." << std::endl; + return 0; +}