diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 28a403e8bd..4c1f6f8d0b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -35,7 +35,7 @@ SYCL/Functor @AlexeySachkov # Group algorithms SYCL/GroupAlgorithm @Pennycook @AlexeySachkov SYCL/SubGroup @Pennycook @AlexeySachkov -SYCL/GroupMask @Pennycook @vladimilaz +SYCL/SubGroupMask @Pennycook @vladimilaz # Group local memory SYCL/GroupLocalMemory @sergey-semenov @Pennycook diff --git a/SYCL/GroupMask/Basic.cpp b/SYCL/GroupMask/Basic.cpp deleted file mode 100644 index 6fe6956e75..0000000000 --- a/SYCL/GroupMask/Basic.cpp +++ /dev/null @@ -1,86 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// REQUIRES: gpu -// UNSUPPORTED: cuda, hip -// GroupNonUniformBallot capability is supported on Intel GPU only -// RUN: %GPU_RUN_PLACEHOLDER %t.out - -//==---------- Basic.cpp - SYCL Group Mask basic test ----------*- C++ -*---==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -using namespace sycl; -constexpr int global_size = 128; -constexpr int local_size = 32; -int main() { -#ifdef SYCL_EXT_ONEAPI_GROUP_MASK - queue Queue; - - try { - nd_range<1> NdRange(global_size, local_size); - int Res = 0; - { - buffer resbuf(&Res, range<1>(1)); - - Queue.submit([&](handler &cgh) { - auto resacc = resbuf.get_access(cgh); - - cgh.parallel_for(NdRange, [=](nd_item<1> NdItem) { - size_t GID = NdItem.get_global_linear_id(); - auto SG = NdItem.get_sub_group(); - auto gmask_gid2 = - ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 2); - auto gmask_gid3 = - ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 3); - NdItem.barrier(); - - if (!GID) { - int res = 0; - - for (size_t i = 0; i < SG.get_max_local_range()[0]; i++) { - res |= !((gmask_gid2 | gmask_gid3)[i] == (i % 2 || i % 3)) << 1; - res |= !((gmask_gid2 & gmask_gid3)[i] == (i % 2 && i % 3)) << 2; - res |= !((gmask_gid2 ^ gmask_gid3)[i] == - ((bool)(i % 2) ^ (bool)(i % 3))) - << 3; - } - gmask_gid2 <<= 32; - res |= (gmask_gid2.extract_bits()[2] != 0xaaaaaaaa) << 4; - res |= ((gmask_gid2 >> 8).extract_bits()[3] != 0xaa000000) << 5; - res |= ((gmask_gid3 >> 8).extract_bits()[3] != 0xb6db6d) << 6; - res |= (!gmask_gid2[32] && gmask_gid2[31]) << 7; - gmask_gid3[0] = gmask_gid3[3] = gmask_gid3[6] = true; - res |= (gmask_gid3.extract_bits()[3] != 0xb6db6dff) << 7; - gmask_gid3.reset(); - res |= !(gmask_gid3.none() && gmask_gid2.any() && !gmask_gid2.all()) - << 8; - gmask_gid2.set(); - res |= !(gmask_gid3.none() && gmask_gid2.any() && gmask_gid2.all()) - << 9; - gmask_gid3.flip(); - res |= (gmask_gid3 != gmask_gid2) << 10; - resacc[0] = res; - } - }); - }); - } - if (Res) { - std::cout << "Unexpected result for group_mask operation: " << Res - << std::endl; - exit(1); - } - } catch (exception e) { - std::cout << "SYCL exception caught: " << e.what(); - exit(1); - } - - std::cout << "Test passed." << std::endl; -#else - std::cout << "Test skipped due to missing extension." << std::endl; -#endif - return 0; -} diff --git a/SYCL/SubGroupMask/Basic.cpp b/SYCL/SubGroupMask/Basic.cpp new file mode 100644 index 0000000000..cffdd7ee25 --- /dev/null +++ b/SYCL/SubGroupMask/Basic.cpp @@ -0,0 +1,104 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// REQUIRES: gpu +// UNSUPPORTED: cuda, hip +// GroupNonUniformBallot capability is supported on Intel GPU only +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +//==---------- Basic.cpp - sub-group mask basic test -----------*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +using namespace sycl; +constexpr int global_size = 128; +constexpr int local_size = 32; +int main() { +#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK + queue Queue; + + try { + nd_range<1> NdRange(global_size, local_size); + int Res = 0; + { + buffer resbuf(&Res, range<1>(1)); + + Queue.submit([&](handler &cgh) { + auto resacc = resbuf.get_access(cgh); + + cgh.parallel_for( + NdRange, [=](nd_item<1> NdItem) [[intel::reqd_sub_group_size(32)]] { + size_t GID = NdItem.get_global_linear_id(); + auto SG = NdItem.get_sub_group(); + // AAAAAAAA + auto gmask_gid2 = + ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 2); + // B6DB6DB6 + auto gmask_gid3 = + ext::oneapi::group_ballot(NdItem.get_sub_group(), GID % 3); + + if (!GID) { + int res = 0; + + for (size_t i = 0; i < SG.get_max_local_range()[0]; i++) { + res |= !((gmask_gid2 | gmask_gid3)[i] == (i % 2 || i % 3)) + << 1; + res |= !((gmask_gid2 & gmask_gid3)[i] == (i % 2 && i % 3)) + << 2; + res |= !((gmask_gid2 ^ gmask_gid3)[i] == + ((bool)(i % 2) ^ (bool)(i % 3))) + << 3; + } + gmask_gid2 <<= 8; + uint32_t r = 0; + gmask_gid2.extract_bits(r); + res |= (r != 0xaaaaaa00) << 4; + (gmask_gid2 >> 4).extract_bits(r); + res |= (r != 0x0aaaaaa0) << 5; + gmask_gid3.insert_bits((char)0b01010101, 8); + res |= (!gmask_gid3[8] || gmask_gid3[9] || !gmask_gid3[10] || + gmask_gid3[11]) + << 6; + marray mr{1}; + gmask_gid3.extract_bits(mr); + res |= (mr[0] != 0xb6 || mr[1] != 0x55 || mr[2] != 0xdb || + mr[3] != 0xb6 || mr[4] || mr[5]) + << 7; + res |= (gmask_gid2[30] || !gmask_gid2[31]) << 8; + gmask_gid3[0] = gmask_gid3[3] = gmask_gid3[6] = true; + gmask_gid3.extract_bits(r); + res |= (r != 0xb6db55ff) << 9; + gmask_gid3.reset(); + res |= !(gmask_gid3.none() && gmask_gid2.any() && + !gmask_gid2.all()) + << 10; + gmask_gid2.set(); + res |= + !(gmask_gid3.none() && gmask_gid2.any() && gmask_gid2.all()) + << 11; + gmask_gid3.flip(); + res |= (gmask_gid3 != gmask_gid2) << 12; + resacc[0] = res; + } + }); + }); + } + if (Res) { + std::cout << "Unexpected result for sub_group_mask operation: " << Res + << std::endl; + exit(1); + } + } catch (exception e) { + std::cout << "SYCL exception caught: " << e.what(); + exit(1); + } + + std::cout << "Test passed." << std::endl; +#else + std::cout << "Test skipped due to missing extension." << std::endl; +#endif + return 0; +} diff --git a/SYCL/SubGroupMask/GroupSize.cpp b/SYCL/SubGroupMask/GroupSize.cpp new file mode 100644 index 0000000000..3f70fe58cb --- /dev/null +++ b/SYCL/SubGroupMask/GroupSize.cpp @@ -0,0 +1,92 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// REQUIRES: gpu +// UNSUPPORTED: cuda, hip +// GroupNonUniformBallot capability is supported on Intel GPU only +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +//==- GroupSize.cpp - sub-group mask dependency on group size --*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +using namespace sycl; +#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK +constexpr int global_size = 128; +constexpr int local_size = 32; +template class sycl_subgr; + +template void test(queue Queue) { + std::cout << "Testing sub_group_mask for sub-group size=" << SGSize + << std::endl; + try { + nd_range<1> NdRange(global_size, local_size); + int Res[32 / SGSize] = {0}; + { + buffer resbuf(Res, range<1>(32 / SGSize)); + + Queue.submit([&](handler &cgh) { + auto resacc = resbuf.template get_access(cgh); + + cgh.parallel_for>( + NdRange, [= + ](nd_item<1> NdItem) [[intel::reqd_sub_group_size(SGSize)]] { + auto SG = NdItem.get_sub_group(); + auto LID = SG.get_local_id(); + auto SGID = SG.get_group_id(); + + auto gmask_gid2 = + ext::oneapi::group_ballot(NdItem.get_sub_group(), LID % 2); + auto gmask_gid3 = + ext::oneapi::group_ballot(NdItem.get_sub_group(), LID % 3); + + if (!LID) { + int res = 0; + + for (size_t i = 0; i < SG.get_max_local_range()[0]; i++) { + res |= !((gmask_gid2 | gmask_gid3)[i] == (i % 2 || i % 3)) + << 1; + res |= !((gmask_gid2 & gmask_gid3)[i] == (i % 2 && i % 3)) + << 2; + res |= !((gmask_gid2 ^ gmask_gid3)[i] == + ((bool)(i % 2) ^ (bool)(i % 3))) + << 3; + } + res |= (gmask_gid2.size() != SG.get_max_local_range()[0]) << 4; + resacc[SGID] = res; + } + }); + }); + } + for (size_t i = 0; i < 32 / SGSize; i++) { + if (Res[i]) { + std::cout + << "Unexpected result for sub_group_mask operation for sub-group " + << i << ": " << Res[i] << std::endl; + exit(1); + } + } + } catch (exception e) { + std::cout << "SYCL exception caught: " << e.what(); + exit(1); + } +} +#endif // SYCL_EXT_ONEAPI_SUB_GROUP_MASK + +int main() { +#ifdef SYCL_EXT_ONEAPI_SUB_GROUP_MASK + queue Queue; + + test<8>(Queue); + test<16>(Queue); + test<32>(Queue); + + std::cout << "Test passed." << std::endl; +#else + std::cout << "Test skipped due to missing extension." << std::endl; +#endif + return 0; +}