From c938a3da5b081821f99dd56ed85845c883956fd9 Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Thu, 25 Feb 2021 20:02:34 -0800 Subject: [PATCH 1/4] [SYCL] Add test cases for muptiplies,bit_or,bit_xor,bit_and subgroup algorithms These new test cases verify https://github.com/intel/llvm/pull/3267 Signed-off-by: Vyacheslav N Klochkov --- SYCL/SubGroup/reduce.hpp | 16 +++++++- SYCL/SubGroup/reduce_bit_ops.cpp | 64 +++++++++++++++++++++++++++++++ SYCL/SubGroup/scan.hpp | 16 +++++++- SYCL/SubGroup/scan_bit_ops.cpp | 65 ++++++++++++++++++++++++++++++++ 4 files changed, 157 insertions(+), 4 deletions(-) create mode 100644 SYCL/SubGroup/reduce_bit_ops.cpp create mode 100644 SYCL/SubGroup/scan_bit_ops.cpp diff --git a/SYCL/SubGroup/reduce.hpp b/SYCL/SubGroup/reduce.hpp index f606dcf5e9..adb77041c0 100644 --- a/SYCL/SubGroup/reduce.hpp +++ b/SYCL/SubGroup/reduce.hpp @@ -88,7 +88,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { check_op, T>(Queue, T(0), ONEAPI::maximum(), true, G, L); -#if __cplusplus >= 201402L + // Transparent operator functors. check_op, T>(Queue, T(L), ONEAPI::plus<>(), false, G, L); @@ -107,5 +107,17 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { check_op< sycl_subgr, T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L); -#endif + + // Use small sub-groups to avoid overflow effects for int multiply operations + // and avoid rounding issues for FP multiply. + L = 4; + check_op, T>( + Queue, T(G), ONEAPI::multiplies(), false, G, L); + check_op, T>( + Queue, T(1), ONEAPI::multiplies(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::multiplies<>(), false, G, L); + check_op, T>( + Queue, T(1), ONEAPI::multiplies<>(), true, G, L); } diff --git a/SYCL/SubGroup/reduce_bit_ops.cpp b/SYCL/SubGroup/reduce_bit_ops.cpp new file mode 100644 index 0000000000..04ef923683 --- /dev/null +++ b/SYCL/SubGroup/reduce_bit_ops.cpp @@ -0,0 +1,64 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA +// +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies correct handling of reduce() algorithm used with +// integer bitwise OR, XOR, AND operations. + +#include "reduce.hpp" + +template +void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) { + check_op, T>( + Queue, T(G), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); + + // Transparent operator functors + check_op, T>( + Queue, T(G), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); +} + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + return 0; +} diff --git a/SYCL/SubGroup/scan.hpp b/SYCL/SubGroup/scan.hpp index c9d630dea2..ee944079db 100644 --- a/SYCL/SubGroup/scan.hpp +++ b/SYCL/SubGroup/scan.hpp @@ -115,7 +115,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { Queue, std::numeric_limits::min(), ONEAPI::maximum(), true, G, L); } -#if __cplusplus >= 201402L + // Transparent operator functors. check_op, T>( Queue, T(L), ONEAPI::plus<>(), false, G, L); check_op, T>( @@ -150,5 +150,17 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { T>(Queue, std::numeric_limits::min(), ONEAPI::maximum<>(), true, G, L); } -#endif + + // Use small sub-groups to avoid overflow effects for int multiply operations + // and avoid rounding issues for FP multiply. + L = 4; + check_op, T>( + Queue, T(L), ONEAPI::multiplies(), false, G, L); + check_op, T>( + Queue, T(1), ONEAPI::multiplies<>(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::multiplies(), false, G, L); + check_op, T>( + Queue, T(1), ONEAPI::multiplies<>(), true, G, L); } diff --git a/SYCL/SubGroup/scan_bit_ops.cpp b/SYCL/SubGroup/scan_bit_ops.cpp new file mode 100644 index 0000000000..317edc0cd3 --- /dev/null +++ b/SYCL/SubGroup/scan_bit_ops.cpp @@ -0,0 +1,65 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA +// +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies correct handling of exclusive_scan and inclusive_scan +// sub-group algorithm used with integer bitwise OR, XOR, AND operations. + +#include "scan.hpp" + +template +void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) { + check_op, T>( + Queue, T(L), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); + + // Transparent operator functors. + check_op, T>( + Queue, T(L), ONEAPI::bit_or<>(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or<>(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_xor<>(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor<>(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_and<>(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and<>(), true, G, L); +} + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} From c3f7aeb251a3a24b9071bb8a65aae14fceb90770 Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Mon, 1 Mar 2021 13:00:42 -0800 Subject: [PATCH 2/4] Additional fixes for CUDA: move spir-v 1.3 test cases to separate files Signed-off-by: Vyacheslav N Klochkov --- SYCL/SubGroup/reduce.hpp | 41 +++++++++++++++-- SYCL/SubGroup/reduce_bit_ops.cpp | 64 -------------------------- SYCL/SubGroup/reduce_spirv13.cpp | 39 ++++++++++++++++ SYCL/SubGroup/reduce_spirv13_fp16.cpp | 20 +++++++++ SYCL/SubGroup/reduce_spirv13_fp64.cpp | 27 +++++++++++ SYCL/SubGroup/scan.hpp | 40 +++++++++++++++-- SYCL/SubGroup/scan_bit_ops.cpp | 65 --------------------------- SYCL/SubGroup/scan_spirv13.cpp | 39 ++++++++++++++++ SYCL/SubGroup/scan_spirv13_fp16.cpp | 20 +++++++++ SYCL/SubGroup/scan_spirv13_fp64.cpp | 27 +++++++++++ 10 files changed, 247 insertions(+), 135 deletions(-) delete mode 100644 SYCL/SubGroup/reduce_bit_ops.cpp create mode 100644 SYCL/SubGroup/reduce_spirv13.cpp create mode 100644 SYCL/SubGroup/reduce_spirv13_fp16.cpp create mode 100644 SYCL/SubGroup/reduce_spirv13_fp64.cpp delete mode 100644 SYCL/SubGroup/scan_bit_ops.cpp create mode 100644 SYCL/SubGroup/scan_spirv13.cpp create mode 100644 SYCL/SubGroup/scan_spirv13_fp16.cpp create mode 100644 SYCL/SubGroup/scan_spirv13_fp64.cpp diff --git a/SYCL/SubGroup/reduce.hpp b/SYCL/SubGroup/reduce.hpp index adb77041c0..4d181fe140 100644 --- a/SYCL/SubGroup/reduce.hpp +++ b/SYCL/SubGroup/reduce.hpp @@ -107,17 +107,52 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { check_op< sycl_subgr, T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L); +} - // Use small sub-groups to avoid overflow effects for int multiply operations - // and avoid rounding issues for FP multiply. - L = 4; +template +void check_mul(queue &Queue, size_t G = 256, size_t L = 4) { check_op, T>( Queue, T(G), ONEAPI::multiplies(), false, G, L); check_op, T>( Queue, T(1), ONEAPI::multiplies(), true, G, L); + // Transparent operator functors. check_op, T>( Queue, T(G), ONEAPI::multiplies<>(), false, G, L); check_op, T>( Queue, T(1), ONEAPI::multiplies<>(), true, G, L); } + +template +void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) { + check_op, T>( + Queue, T(G), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); + + // Transparent operator functors + check_op, T>( + Queue, T(G), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(G), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); +} diff --git a/SYCL/SubGroup/reduce_bit_ops.cpp b/SYCL/SubGroup/reduce_bit_ops.cpp deleted file mode 100644 index 04ef923683..0000000000 --- a/SYCL/SubGroup/reduce_bit_ops.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// UNSUPPORTED: cpu -// #2252 Disable until all variants of built-ins are available in OpenCL CPU -// runtime for every supported ISA -// -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out - -// This test verifies correct handling of reduce() algorithm used with -// integer bitwise OR, XOR, AND operations. - -#include "reduce.hpp" - -template -void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) { - check_op, T>( - Queue, T(G), ONEAPI::bit_or(), false, G, L); - check_op, T>( - Queue, T(0), ONEAPI::bit_or(), true, G, L); - - check_op, T>( - Queue, T(G), ONEAPI::bit_xor(), false, G, L); - check_op, T>( - Queue, T(0), ONEAPI::bit_xor(), true, G, L); - - check_op, T>( - Queue, T(G), ONEAPI::bit_and(), false, G, L); - check_op, T>( - Queue, ~T(0), ONEAPI::bit_and(), true, G, L); - - // Transparent operator functors - check_op, T>( - Queue, T(G), ONEAPI::bit_or(), false, G, L); - check_op, T>( - Queue, T(0), ONEAPI::bit_or(), true, G, L); - - check_op, T>( - Queue, T(G), ONEAPI::bit_xor(), false, G, L); - check_op, T>( - Queue, T(0), ONEAPI::bit_xor(), true, G, L); - - check_op, T>( - Queue, T(G), ONEAPI::bit_and(), false, G, L); - check_op, T>( - Queue, ~T(0), ONEAPI::bit_and(), true, G, L); -} - -int main() { - queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - return 0; -} diff --git a/SYCL/SubGroup/reduce_spirv13.cpp b/SYCL/SubGroup/reduce_spirv13.cpp new file mode 100644 index 0000000000..82ff043ccf --- /dev/null +++ b/SYCL/SubGroup/reduce_spirv13.cpp @@ -0,0 +1,39 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA + +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 reduce algorithm +// used with the operation MUL, bitwise OR, XOR, AND. + +#include "reduce.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + return 0; +} diff --git a/SYCL/SubGroup/reduce_spirv13_fp16.cpp b/SYCL/SubGroup/reduce_spirv13_fp16.cpp new file mode 100644 index 0000000000..6bce6cad09 --- /dev/null +++ b/SYCL/SubGroup/reduce_spirv13_fp16.cpp @@ -0,0 +1,20 @@ +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 reduce algorithm +// used with MUL operation. + +#include "reduce.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + check_mul(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/SYCL/SubGroup/reduce_spirv13_fp64.cpp b/SYCL/SubGroup/reduce_spirv13_fp64.cpp new file mode 100644 index 0000000000..9c4591cefe --- /dev/null +++ b/SYCL/SubGroup/reduce_spirv13_fp64.cpp @@ -0,0 +1,27 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA + +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 reduce algorithm +// used with MUL operation. + +#include "reduce.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + check_mul(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/SYCL/SubGroup/scan.hpp b/SYCL/SubGroup/scan.hpp index ee944079db..3597eb2ba5 100644 --- a/SYCL/SubGroup/scan.hpp +++ b/SYCL/SubGroup/scan.hpp @@ -150,10 +150,10 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { T>(Queue, std::numeric_limits::min(), ONEAPI::maximum<>(), true, G, L); } +} - // Use small sub-groups to avoid overflow effects for int multiply operations - // and avoid rounding issues for FP multiply. - L = 4; +template +void check_mul(queue &Queue, size_t G = 256, size_t L = 4) { check_op, T>( Queue, T(L), ONEAPI::multiplies(), false, G, L); check_op, T>( @@ -164,3 +164,37 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) { check_op, T>( Queue, T(1), ONEAPI::multiplies<>(), true, G, L); } + +template +void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) { + check_op, T>( + Queue, T(L), ONEAPI::bit_or(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_xor(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_and(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and(), true, G, L); + + // Transparent operator functors. + check_op, T>( + Queue, T(L), ONEAPI::bit_or<>(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_or<>(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_xor<>(), false, G, L); + check_op, T>( + Queue, T(0), ONEAPI::bit_xor<>(), true, G, L); + + check_op, T>( + Queue, T(L), ONEAPI::bit_and<>(), false, G, L); + check_op, T>( + Queue, ~T(0), ONEAPI::bit_and<>(), true, G, L); +} diff --git a/SYCL/SubGroup/scan_bit_ops.cpp b/SYCL/SubGroup/scan_bit_ops.cpp deleted file mode 100644 index 317edc0cd3..0000000000 --- a/SYCL/SubGroup/scan_bit_ops.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// UNSUPPORTED: cpu -// #2252 Disable until all variants of built-ins are available in OpenCL CPU -// runtime for every supported ISA -// -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %HOST_RUN_PLACEHOLDER %t.out -// RUN: %CPU_RUN_PLACEHOLDER %t.out -// RUN: %GPU_RUN_PLACEHOLDER %t.out -// RUN: %ACC_RUN_PLACEHOLDER %t.out - -// This test verifies correct handling of exclusive_scan and inclusive_scan -// sub-group algorithm used with integer bitwise OR, XOR, AND operations. - -#include "scan.hpp" - -template -void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) { - check_op, T>( - Queue, T(L), ONEAPI::bit_or(), false, G, L); - check_op, T>( - Queue, T(0), ONEAPI::bit_or(), true, G, L); - - check_op, T>( - Queue, T(L), ONEAPI::bit_xor(), false, G, L); - check_op, T>( - Queue, T(0), ONEAPI::bit_xor(), true, G, L); - - check_op, T>( - Queue, T(L), ONEAPI::bit_and(), false, G, L); - check_op, T>( - Queue, ~T(0), ONEAPI::bit_and(), true, G, L); - - // Transparent operator functors. - check_op, T>( - Queue, T(L), ONEAPI::bit_or<>(), false, G, L); - check_op, T>( - Queue, T(0), ONEAPI::bit_or<>(), true, G, L); - - check_op, T>( - Queue, T(L), ONEAPI::bit_xor<>(), false, G, L); - check_op, T>( - Queue, T(0), ONEAPI::bit_xor<>(), true, G, L); - - check_op, T>( - Queue, T(L), ONEAPI::bit_and<>(), false, G, L); - check_op, T>( - Queue, ~T(0), ONEAPI::bit_and<>(), true, G, L); -} - -int main() { - queue Queue; - if (!core_sg_supported(Queue.get_device())) { - std::cout << "Skipping test\n"; - return 0; - } - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - check_bit_ops(Queue); - std::cout << "Test passed." << std::endl; - return 0; -} diff --git a/SYCL/SubGroup/scan_spirv13.cpp b/SYCL/SubGroup/scan_spirv13.cpp new file mode 100644 index 0000000000..36e484754e --- /dev/null +++ b/SYCL/SubGroup/scan_spirv13.cpp @@ -0,0 +1,39 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA + +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and +// inclusive_scan() algoriths used with the operation MUL, bitwise OR, XOR, AND. + +#include "scan.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + check_mul(Queue); + + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + check_bit_ops(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/SYCL/SubGroup/scan_spirv13_fp16.cpp b/SYCL/SubGroup/scan_spirv13_fp16.cpp new file mode 100644 index 0000000000..0532d06b8a --- /dev/null +++ b/SYCL/SubGroup/scan_spirv13_fp16.cpp @@ -0,0 +1,20 @@ +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and +// inclusive_scan() algoriths used with the MUL operation. + +#include "scan.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + check_mul(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} diff --git a/SYCL/SubGroup/scan_spirv13_fp64.cpp b/SYCL/SubGroup/scan_spirv13_fp64.cpp new file mode 100644 index 0000000000..4d4102127a --- /dev/null +++ b/SYCL/SubGroup/scan_spirv13_fp64.cpp @@ -0,0 +1,27 @@ +// UNSUPPORTED: cpu +// #2252 Disable until all variants of built-ins are available in OpenCL CPU +// runtime for every supported ISA + +// UNSUPPORTED: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %HOST_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and +// inclusive_scan() algoriths used with the MUL operation. + +#include "scan.hpp" + +int main() { + queue Queue; + if (!core_sg_supported(Queue.get_device())) { + std::cout << "Skipping test\n"; + return 0; + } + check(Queue); + std::cout << "Test passed." << std::endl; + return 0; +} From ac90159911f8c71b75f4f2e18482789ef7f7ca7e Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Mon, 1 Mar 2021 13:29:57 -0800 Subject: [PATCH 3/4] Fix the check for sub-groups availability. It returned false even when device had support for sub-groups Signed-off-by: Vyacheslav N Klochkov --- SYCL/SubGroup/helper.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SYCL/SubGroup/helper.hpp b/SYCL/SubGroup/helper.hpp index 9f4d29ad5e..964fb742bc 100644 --- a/SYCL/SubGroup/helper.hpp +++ b/SYCL/SubGroup/helper.hpp @@ -154,7 +154,7 @@ void exit_if_not_equal_vec(vec val, vec ref, const char *name) { } bool core_sg_supported(const device &Device) { - return (Device.has_extension("cl_khr_subgroups") || - Device.get_info().find(" 2.1") != - string_class::npos); + if (Device.has_extension("cl_khr_subgroups")) + return true; + return Device.get_info() >= "2.1"; } From 925ce426bb754423531f70b26482e9c98005857a Mon Sep 17 00:00:00 2001 From: Vyacheslav N Klochkov Date: Tue, 2 Mar 2021 09:10:08 -0800 Subject: [PATCH 4/4] Add checks for cl_khr_fp16/fp64 Signed-off-by: Vyacheslav N Klochkov --- SYCL/SubGroup/reduce_fp16.cpp | 12 ++++-------- SYCL/SubGroup/reduce_fp64.cpp | 12 ++++-------- SYCL/SubGroup/reduce_spirv13_fp16.cpp | 3 ++- SYCL/SubGroup/reduce_spirv13_fp64.cpp | 3 ++- SYCL/SubGroup/scan_fp16.cpp | 12 ++++-------- SYCL/SubGroup/scan_fp64.cpp | 12 ++++-------- SYCL/SubGroup/scan_spirv13_fp16.cpp | 3 ++- SYCL/SubGroup/scan_spirv13_fp64.cpp | 3 ++- 8 files changed, 24 insertions(+), 36 deletions(-) diff --git a/SYCL/SubGroup/reduce_fp16.cpp b/SYCL/SubGroup/reduce_fp16.cpp index 1d6e249eb1..323f3e63b2 100644 --- a/SYCL/SubGroup/reduce_fp16.cpp +++ b/SYCL/SubGroup/reduce_fp16.cpp @@ -1,18 +1,14 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -///==---------- reduce_fp16.cpp - SYCL sub_group reduce test ----*- C++ -*--==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// + +// This test verifies the correct work of the sub-group algorithm reduce(). #include "reduce.hpp" int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp16")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/reduce_fp64.cpp b/SYCL/SubGroup/reduce_fp64.cpp index 2e4699d35d..78f7994466 100644 --- a/SYCL/SubGroup/reduce_fp64.cpp +++ b/SYCL/SubGroup/reduce_fp64.cpp @@ -7,19 +7,15 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -///==---------- reduce_fp64.cpp - SYCL sub_group reduce test ----*- C++ -*--==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// + +// This test verifies the correct work of the sub-group algorithm reduce(). #include "reduce.hpp" int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp64")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/reduce_spirv13_fp16.cpp b/SYCL/SubGroup/reduce_spirv13_fp16.cpp index 6bce6cad09..e60826e99f 100644 --- a/SYCL/SubGroup/reduce_spirv13_fp16.cpp +++ b/SYCL/SubGroup/reduce_spirv13_fp16.cpp @@ -10,7 +10,8 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp16")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/reduce_spirv13_fp64.cpp b/SYCL/SubGroup/reduce_spirv13_fp64.cpp index 9c4591cefe..79ea2e1939 100644 --- a/SYCL/SubGroup/reduce_spirv13_fp64.cpp +++ b/SYCL/SubGroup/reduce_spirv13_fp64.cpp @@ -17,7 +17,8 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp64")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/scan_fp16.cpp b/SYCL/SubGroup/scan_fp16.cpp index 47bd49b6a0..dc73279f50 100644 --- a/SYCL/SubGroup/scan_fp16.cpp +++ b/SYCL/SubGroup/scan_fp16.cpp @@ -1,19 +1,15 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out -//==---------- scan_fp16.cpp - SYCL sub_group scan test --------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// +// This test verifies the correct work of the sub-group algorithms +// exclusive_scan() and inclusive_scan(). #include "scan.hpp" int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp16")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/scan_fp64.cpp b/SYCL/SubGroup/scan_fp64.cpp index 07409b3e82..14b1383f69 100644 --- a/SYCL/SubGroup/scan_fp64.cpp +++ b/SYCL/SubGroup/scan_fp64.cpp @@ -8,19 +8,15 @@ // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -//==---------- scan_fp64.cpp - SYCL sub_group scan test --------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// +// This test verifies the correct work of the sub-group algorithms +// exclusive_scan() and inclusive_scan(). #include "scan.hpp" int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp64")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/scan_spirv13_fp16.cpp b/SYCL/SubGroup/scan_spirv13_fp16.cpp index 0532d06b8a..62265ab8c0 100644 --- a/SYCL/SubGroup/scan_spirv13_fp16.cpp +++ b/SYCL/SubGroup/scan_spirv13_fp16.cpp @@ -10,7 +10,8 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp16")) { std::cout << "Skipping test\n"; return 0; } diff --git a/SYCL/SubGroup/scan_spirv13_fp64.cpp b/SYCL/SubGroup/scan_spirv13_fp64.cpp index 4d4102127a..c1bcbed831 100644 --- a/SYCL/SubGroup/scan_spirv13_fp64.cpp +++ b/SYCL/SubGroup/scan_spirv13_fp64.cpp @@ -17,7 +17,8 @@ int main() { queue Queue; - if (!core_sg_supported(Queue.get_device())) { + if (!core_sg_supported(Queue.get_device()) || + !Queue.get_device().has_extension("cl_khr_fp64")) { std::cout << "Skipping test\n"; return 0; }