Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Add test cases for muptiplies,bit_or,bit_xor,bit_and subgroup … #158

Merged
merged 4 commits into from
Mar 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions SYCL/SubGroup/helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void exit_if_not_equal_vec(vec<T, N> val, vec<T, N> ref, const char *name) {
}

bool core_sg_supported(const device &Device) {
return (Device.has_extension("cl_khr_subgroups") ||
Device.get_info<info::device::version>().find(" 2.1") !=
string_class::npos);
if (Device.has_extension("cl_khr_subgroups"))
return true;
return Device.get_info<info::device::version>() >= "2.1";
}
51 changes: 49 additions & 2 deletions SYCL/SubGroup/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_bPPlfvdGShi>,
T>(Queue, T(0), ONEAPI::maximum<T>(), true, G, L);

#if __cplusplus >= 201402L
// Transparent operator functors.
check_op<sycl_subgr<SpecializationKernelName,
class KernelName_fkOyLRYirfMnvBcnbRFy>,
T>(Queue, T(L), ONEAPI::plus<>(), false, G, L);
Expand All @@ -107,5 +107,52 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
check_op<
sycl_subgr<SpecializationKernelName, class KernelName_BaCGaWDMFeMFqvotbk>,
T>(Queue, T(0), ONEAPI::maximum<>(), true, G, L);
#endif
}

template <typename SpecializationKernelName, typename T>
void check_mul(queue &Queue, size_t G = 256, size_t L = 4) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulF>, T>(
Queue, T(G), ONEAPI::multiplies<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulT>, T>(
Queue, T(1), ONEAPI::multiplies<T>(), true, G, L);

// Transparent operator functors.
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulFV>, T>(
Queue, T(G), ONEAPI::multiplies<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulTV>, T>(
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
}

template <typename SpecializationKernelName, typename T>
void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORF>, T>(
Queue, T(G), ONEAPI::bit_or<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORT>, T>(
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORF>, T>(
Queue, T(G), ONEAPI::bit_xor<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORT>, T>(
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDF>, T>(
Queue, T(G), ONEAPI::bit_and<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDT>, T>(
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);

// Transparent operator functors
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORFV>, T>(
Queue, T(G), ONEAPI::bit_or<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORTV>, T>(
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORFV>, T>(
Queue, T(G), ONEAPI::bit_xor<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORTV>, T>(
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDFV>, T>(
Queue, T(G), ONEAPI::bit_and<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDTV>, T>(
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);
}
12 changes: 4 additions & 8 deletions SYCL/SubGroup/reduce_fp16.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
///==---------- reduce_fp16.cpp - SYCL sub_group reduce test ----*- C++ -*--==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// This test verifies the correct work of the sub-group algorithm reduce().

#include "reduce.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
if (!core_sg_supported(Queue.get_device()) ||
!Queue.get_device().has_extension("cl_khr_fp16")) {
std::cout << "Skipping test\n";
return 0;
}
Expand Down
12 changes: 4 additions & 8 deletions SYCL/SubGroup/reduce_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,15 @@
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out
///==---------- reduce_fp64.cpp - SYCL sub_group reduce test ----*- C++ -*--==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// This test verifies the correct work of the sub-group algorithm reduce().

#include "reduce.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
if (!core_sg_supported(Queue.get_device()) ||
!Queue.get_device().has_extension("cl_khr_fp64")) {
std::cout << "Skipping test\n";
return 0;
}
Expand Down
39 changes: 39 additions & 0 deletions SYCL/SubGroup/reduce_spirv13.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// UNSUPPORTED: cpu
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA

// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
// used with the operation MUL, bitwise OR, XOR, AND.

#include "reduce.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
std::cout << "Skipping test\n";
return 0;
}

check_mul<class MulA, int>(Queue);
check_mul<class MulB, unsigned int>(Queue);
check_mul<class MulC, long>(Queue);
check_mul<class MulD, unsigned long>(Queue);
check_mul<class MulE, float>(Queue);

check_bit_ops<class A, int>(Queue);
check_bit_ops<class B, unsigned int>(Queue);
check_bit_ops<class C, unsigned>(Queue);
check_bit_ops<class D, long>(Queue);
check_bit_ops<class E, unsigned long>(Queue);
check_bit_ops<class F, long long>(Queue);
check_bit_ops<class G, unsigned long long>(Queue);
return 0;
}
21 changes: 21 additions & 0 deletions SYCL/SubGroup/reduce_spirv13_fp16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
// used with MUL operation.

#include "reduce.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device()) ||
!Queue.get_device().has_extension("cl_khr_fp16")) {
std::cout << "Skipping test\n";
return 0;
}
check_mul<class MulHalf, cl::sycl::half>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}
28 changes: 28 additions & 0 deletions SYCL/SubGroup/reduce_spirv13_fp64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// UNSUPPORTED: cpu
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA

// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 reduce algorithm
// used with MUL operation.

#include "reduce.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device()) ||
!Queue.get_device().has_extension("cl_khr_fp64")) {
std::cout << "Skipping test\n";
return 0;
}
check_mul<class MulDouble, double>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}
50 changes: 48 additions & 2 deletions SYCL/SubGroup/scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<T>(), true, G, L);
}

#if __cplusplus >= 201402L
// Transparent operator functors.
check_op<sycl_subgr<SpecializationKernelName, class KernelName_TPWS>, T>(
Queue, T(L), ONEAPI::plus<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_hWZv>, T>(
Expand Down Expand Up @@ -150,5 +150,51 @@ void check(queue &Queue, size_t G = 256, size_t L = 64) {
T>(Queue, std::numeric_limits<T>::min(), ONEAPI::maximum<>(), true, G,
L);
}
#endif
}

template <typename SpecializationKernelName, typename T>
void check_mul(queue &Queue, size_t G = 256, size_t L = 4) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulF>, T>(
Queue, T(L), ONEAPI::multiplies<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulT>, T>(
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulFV>, T>(
Queue, T(L), ONEAPI::multiplies<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_MulTV>, T>(
Queue, T(1), ONEAPI::multiplies<>(), true, G, L);
}

template <typename SpecializationKernelName, typename T>
void check_bit_ops(queue &Queue, size_t G = 256, size_t L = 4) {
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORF>, T>(
Queue, T(L), ONEAPI::bit_or<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORT>, T>(
Queue, T(0), ONEAPI::bit_or<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORF>, T>(
Queue, T(L), ONEAPI::bit_xor<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORT>, T>(
Queue, T(0), ONEAPI::bit_xor<T>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDF>, T>(
Queue, T(L), ONEAPI::bit_and<T>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDT>, T>(
Queue, ~T(0), ONEAPI::bit_and<T>(), true, G, L);

// Transparent operator functors.
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORFV>, T>(
Queue, T(L), ONEAPI::bit_or<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ORTV>, T>(
Queue, T(0), ONEAPI::bit_or<>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORFV>, T>(
Queue, T(L), ONEAPI::bit_xor<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_XORTV>, T>(
Queue, T(0), ONEAPI::bit_xor<>(), true, G, L);

check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDFV>, T>(
Queue, T(L), ONEAPI::bit_and<>(), false, G, L);
check_op<sycl_subgr<SpecializationKernelName, class KernelName_ANDTV>, T>(
Queue, ~T(0), ONEAPI::bit_and<>(), true, G, L);
}
12 changes: 4 additions & 8 deletions SYCL/SubGroup/scan_fp16.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,15 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

//==---------- scan_fp16.cpp - SYCL sub_group scan test --------*- C++ -*---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This test verifies the correct work of the sub-group algorithms
// exclusive_scan() and inclusive_scan().

#include "scan.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
if (!core_sg_supported(Queue.get_device()) ||
!Queue.get_device().has_extension("cl_khr_fp16")) {
std::cout << "Skipping test\n";
return 0;
}
Expand Down
12 changes: 4 additions & 8 deletions SYCL/SubGroup/scan_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,15 @@
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

//==---------- scan_fp64.cpp - SYCL sub_group scan test --------*- C++ -*---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This test verifies the correct work of the sub-group algorithms
// exclusive_scan() and inclusive_scan().

#include "scan.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
if (!core_sg_supported(Queue.get_device()) ||
!Queue.get_device().has_extension("cl_khr_fp64")) {
std::cout << "Skipping test\n";
return 0;
}
Expand Down
39 changes: 39 additions & 0 deletions SYCL/SubGroup/scan_spirv13.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// UNSUPPORTED: cpu
// #2252 Disable until all variants of built-ins are available in OpenCL CPU
// runtime for every supported ISA

// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out
// RUN: %CPU_RUN_PLACEHOLDER %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out
// RUN: %ACC_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
// inclusive_scan() algoriths used with the operation MUL, bitwise OR, XOR, AND.

#include "scan.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device())) {
std::cout << "Skipping test\n";
return 0;
}
check_mul<class MulA, int>(Queue);
check_mul<class MulB, unsigned int>(Queue);
check_mul<class MulC, long>(Queue);
check_mul<class MulD, unsigned long>(Queue);
check_mul<class MulE, float>(Queue);

check_bit_ops<class A, int>(Queue);
check_bit_ops<class B, unsigned int>(Queue);
check_bit_ops<class C, unsigned>(Queue);
check_bit_ops<class D, long>(Queue);
check_bit_ops<class E, unsigned long>(Queue);
check_bit_ops<class F, long long>(Queue);
check_bit_ops<class G, unsigned long long>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}
21 changes: 21 additions & 0 deletions SYCL/SubGroup/scan_spirv13_fp16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// UNSUPPORTED: cuda

// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %GPU_RUN_PLACEHOLDER %t.out

// This test verifies the correct work of SPIR-V 1.3 exclusive_scan() and
// inclusive_scan() algoriths used with the MUL operation.

#include "scan.hpp"

int main() {
queue Queue;
if (!core_sg_supported(Queue.get_device()) ||
!Queue.get_device().has_extension("cl_khr_fp16")) {
std::cout << "Skipping test\n";
return 0;
}
check_mul<class MulHalf, cl::sycl::half>(Queue);
std::cout << "Test passed." << std::endl;
return 0;
}
Loading