Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Update sub-group mask test #462

Merged
merged 7 commits into from
Sep 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ SYCL/Functor @AlexeySachkov
# Group algorithms
SYCL/GroupAlgorithm @Pennycook @AlexeySachkov
SYCL/SubGroup @Pennycook @AlexeySachkov
SYCL/GroupMask @Pennycook @vladimilaz
SYCL/SubGroupMask @Pennycook @vladimilaz

# Group local memory
SYCL/GroupLocalMemory @sergey-semenov @Pennycook
Expand Down
86 changes: 0 additions & 86 deletions SYCL/GroupMask/Basic.cpp

This file was deleted.

104 changes: 104 additions & 0 deletions SYCL/SubGroupMask/Basic.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// REQUIRES: gpu
// UNSUPPORTED: cuda, hip
// GroupNonUniformBallot capability is supported on Intel GPU only
// RUN: %GPU_RUN_PLACEHOLDER %t.out

//==---------- Basic.cpp - sub-group mask basic test -----------*- C++ -*---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>
using namespace sycl;
constexpr int global_size = 128;
constexpr int local_size = 32;
int main() {
#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK
queue Queue;

try {
nd_range<1> NdRange(global_size, local_size);
int Res = 0;
{
buffer resbuf(&Res, range<1>(1));

Queue.submit([&](handler &cgh) {
auto resacc = resbuf.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<class sub_group_mask_test>(
NdRange, [=](nd_item<1> NdItem) [[intel::reqd_sub_group_size(32)]] {
size_t GID = NdItem.get_global_linear_id();
auto SG = NdItem.get_sub_group();
// AAAAAAAA
auto gmask_gid2 =
ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 2);
// B6DB6DB6
auto gmask_gid3 =
ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 3);

if (!GID) {
int res = 0;

for (size_t i = 0; i < SG.get_max_local_range()[0]; i++) {
res |= !((gmask_gid2 | gmask_gid3)[i] == (i % 2 || i % 3))
<< 1;
res |= !((gmask_gid2 & gmask_gid3)[i] == (i % 2 && i % 3))
<< 2;
res |= !((gmask_gid2 ^ gmask_gid3)[i] ==
((bool)(i % 2) ^ (bool)(i % 3)))
<< 3;
}
gmask_gid2 <<= 8;
uint32_t r = 0;
gmask_gid2.extract_bits(r);
res |= (r != 0xaaaaaa00) << 4;
(gmask_gid2 >> 4).extract_bits(r);
res |= (r != 0x0aaaaaa0) << 5;
gmask_gid3.insert_bits((char)0b01010101, 8);
res |= (!gmask_gid3[8] || gmask_gid3[9] || !gmask_gid3[10] ||
gmask_gid3[11])
<< 6;
marray<unsigned char, 6> mr{1};

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry if this is obvious, but why is this a 6?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just longer-than-sub_group_mask-can accept.

gmask_gid3.extract_bits(mr);
res |= (mr[0] != 0xb6 || mr[1] != 0x55 || mr[2] != 0xdb ||
mr[3] != 0xb6 || mr[4] || mr[5])
<< 7;
res |= (gmask_gid2[30] || !gmask_gid2[31]) << 8;
gmask_gid3[0] = gmask_gid3[3] = gmask_gid3[6] = true;
gmask_gid3.extract_bits(r);
res |= (r != 0xb6db55ff) << 9;
gmask_gid3.reset();
res |= !(gmask_gid3.none() && gmask_gid2.any() &&
!gmask_gid2.all())
<< 10;
gmask_gid2.set();
res |=
!(gmask_gid3.none() && gmask_gid2.any() && gmask_gid2.all())
<< 11;
gmask_gid3.flip();
res |= (gmask_gid3 != gmask_gid2) << 12;
resacc[0] = res;
}
});
});
}
if (Res) {
std::cout << "Unexpected result for sub_group_mask operation: " << Res
<< std::endl;
exit(1);
}
} catch (exception e) {
std::cout << "SYCL exception caught: " << e.what();
exit(1);
}

std::cout << "Test passed." << std::endl;
#else
std::cout << "Test skipped due to missing extension." << std::endl;
#endif
return 0;
}
92 changes: 92 additions & 0 deletions SYCL/SubGroupMask/GroupSize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// REQUIRES: gpu
// UNSUPPORTED: cuda, hip
// GroupNonUniformBallot capability is supported on Intel GPU only
// RUN: %GPU_RUN_PLACEHOLDER %t.out

//==- GroupSize.cpp - sub-group mask dependency on group size --*- C++ -*---==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <CL/sycl.hpp>
using namespace sycl;
#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK
constexpr int global_size = 128;
constexpr int local_size = 32;
template <size_t> class sycl_subgr;

template <size_t SGSize> void test(queue Queue) {
std::cout << "Testing sub_group_mask for sub-group size=" << SGSize
<< std::endl;
try {
nd_range<1> NdRange(global_size, local_size);
int Res[32 / SGSize] = {0};
{
buffer resbuf(Res, range<1>(32 / SGSize));

Queue.submit([&](handler &cgh) {
auto resacc = resbuf.template get_access<access::mode::read_write>(cgh);

cgh.parallel_for<sycl_subgr<SGSize>>(
NdRange, [=
](nd_item<1> NdItem) [[intel::reqd_sub_group_size(SGSize)]] {
auto SG = NdItem.get_sub_group();
auto LID = SG.get_local_id();
auto SGID = SG.get_group_id();

auto gmask_gid2 =
ext::oneapi::group_ballot(NdItem.get_sub_group(), LID % 2);
auto gmask_gid3 =
ext::oneapi::group_ballot(NdItem.get_sub_group(), LID % 3);

if (!LID) {
int res = 0;

for (size_t i = 0; i < SG.get_max_local_range()[0]; i++) {
res |= !((gmask_gid2 | gmask_gid3)[i] == (i % 2 || i % 3))
<< 1;
res |= !((gmask_gid2 & gmask_gid3)[i] == (i % 2 && i % 3))
<< 2;
res |= !((gmask_gid2 ^ gmask_gid3)[i] ==
((bool)(i % 2) ^ (bool)(i % 3)))
<< 3;
}
res |= (gmask_gid2.size() != SG.get_max_local_range()[0]) << 4;
resacc[SGID] = res;
}
});
});
}
for (size_t i = 0; i < 32 / SGSize; i++) {
if (Res[i]) {
std::cout
<< "Unexpected result for sub_group_mask operation for sub-group "
<< i << ": " << Res[i] << std::endl;
exit(1);
}
}
} catch (exception e) {
std::cout << "SYCL exception caught: " << e.what();
exit(1);
}
}
#endif // SYCL_EXT_ONEAPI_SUB_GROUP_MASK

int main() {
#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK
queue Queue;

test<8>(Queue);
test<16>(Queue);
test<32>(Queue);

std::cout << "Test passed." << std::endl;
#else
std::cout << "Test skipped due to missing extension." << std::endl;
#endif
return 0;
}