Skip to content

Commit c3abb3d

Browse files
committed
[SYCL] Incorrect diagnostic for __spirv calls
When the reqd_sub_group_size attribute is applied on a sycl kernel, we check that SYCL_EXTERNAL functions called from such kernels also have the same sub_group_size. This need not be enforced on __spirv intrinsics since they are mapped tp the equivalent SPIR-V operations.
1 parent 7bc8447 commit c3abb3d

File tree

3 files changed

+76
-0
lines changed

3 files changed

+76
-0
lines changed

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3719,6 +3719,12 @@ static void CheckSYCL2020SubGroupSizes(Sema &S, FunctionDecl *SYCLKernel,
37193719
CalcEffectiveSubGroup(S.Context, S.getLangOpts(), FD))
37203720
return;
37213721

3722+
// No need to validate __spirv routines here since they
3723+
// are mapped to the equivalent SPIRV operations.
3724+
const IdentifierInfo *II = FD->getIdentifier();
3725+
if (II && II->getName().startswith("__spirv_"))
3726+
return;
3727+
37223728
// Else we need to figure out why they don't match.
37233729
SourceLocation FDAttrLoc = GetSubGroupLoc(FD);
37243730
SourceLocation KernelAttrLoc = GetSubGroupLoc(SYCLKernel);

clang/test/CodeGenSYCL/Inputs/sycl.hpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44

55
extern "C" int printf(const char* fmt, ...);
66

7+
#ifdef __SYCL_DEVICE_ONLY__
8+
__attribute__((convergent))
9+
extern SYCL_EXTERNAL void
10+
__spirv_ControlBarrier(int, int, int) noexcept;
11+
#endif
12+
713
// Dummy runtime classes to model SYCL API.
814
inline namespace cl {
915
namespace sycl {
@@ -61,6 +67,13 @@ enum class address_space : int {
6167
constant_space,
6268
local_space
6369
};
70+
71+
enum class fence_space {
72+
local_space = 0,
73+
global_space = 1,
74+
global_and_local = 2
75+
};
76+
6477
} // namespace access
6578

6679
// Dummy aspect enum with limited enumerators
@@ -164,6 +177,16 @@ template <int dim> struct item {
164177
int Data;
165178
};
166179

180+
template <int dimensions = 1> class nd_item {
181+
public:
182+
void barrier(access::fence_space accessSpace =
183+
access::fence_space::global_and_local) const {
184+
#ifdef __SYCL_DEVICE_ONLY__
185+
__spirv_ControlBarrier(0, 0, 0);
186+
#endif
187+
}
188+
};
189+
167190
namespace ext {
168191
namespace oneapi {
169192
template <typename... properties>
@@ -399,6 +422,12 @@ kernel_parallel_for(const KernelType &KernelFunc) {
399422
KernelFunc(id<Dims>());
400423
}
401424

425+
template <typename KernelName, typename KernelType, int Dims>
426+
ATTR_SYCL_KERNEL void
427+
kernel_parallel_for_item(const KernelType &KernelFunc) {
428+
KernelFunc(nd_item<Dims>());
429+
}
430+
402431
template <typename KernelName, typename KernelType, int Dims>
403432
ATTR_SYCL_KERNEL void
404433
kernel_parallel_for_work_group(const KernelType &KernelFunc) {
@@ -407,6 +436,16 @@ kernel_parallel_for_work_group(const KernelType &KernelFunc) {
407436

408437
class handler {
409438
public:
439+
template <typename KernelName = auto_name, typename KernelType, int Dims>
440+
void parallel_for(nd_range<Dims> numWorkItems, const KernelType &kernelFunc) {
441+
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
442+
#ifdef __SYCL_DEVICE_ONLY__
443+
kernel_parallel_for_item<NameT, KernelType, Dims>(kernelFunc);
444+
#else
445+
kernelFunc(nd_item<Dims>());
446+
#endif
447+
}
448+
410449
template <typename KernelName = auto_name, typename KernelType, int Dims>
411450
void parallel_for(range<Dims> numWorkItems, const KernelType &kernelFunc) {
412451
using NameT = typename get_kernel_name_t<KernelName, KernelType>::name;
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// RUN: %clang_cc1 -triple spir64 -fsycl-is-device -std=c++17 -internal-isystem %S/Inputs -fdeclare-spirv-builtins %s -emit-llvm -o - | FileCheck %s
2+
3+
// Test that when __spirv intrinsics are invoked from kernel functions
4+
// that have a sub_group_size specified, that such invocations don't
5+
// trigger the error diagnostic that the intrinsic routines must also
6+
// marked with the same attribute.
7+
8+
#include "Inputs/sycl.hpp"
9+
10+
int main() {
11+
const int local_size = 8;
12+
const int global_size = 1*local_size;
13+
14+
cl::sycl::queue q;
15+
16+
q.submit([&] (cl::sycl::handler &cgh) {
17+
auto kernel_ = [=](cl::sycl::nd_item<1> item) [[intel::sub_group_size(8)]] {
18+
item.barrier(sycl::access::fence_space::local_space);
19+
};
20+
21+
cgh.parallel_for<class kernel_class>(cl::sycl::nd_range<1>(), kernel_);
22+
});
23+
return 0;
24+
}
25+
26+
// CHECK: define dso_local spir_kernel void @{{.*}}main{{.*}}kernel_class() {{.*}} !intel_reqd_sub_group_size ![[SUBGROUPSIZE:[0-9]+]]
27+
// CHECK: tail call spir_func void @{{.*}}__spirv_ControlBarrier{{.*}}({{.*}})
28+
29+
// CHECK: declare spir_func void @{{.*}}__spirv_ControlBarrier{{.*}}({{.*}})
30+
31+
// CHECK: ![[SUBGROUPSIZE]] = !{i32 8}

0 commit comments

Comments
 (0)