diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index cb91cc1707b23..3625412699216 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2581,32 +2581,39 @@ def SYCLIntelUseStallEnableClustersAttrDocs : Documentation { let Category = DocCatFunction; let Heading = "intel::use_stall_enable_clusters"; let Content = [{ -When applied to a lambda or function call operator (of a function object) -on device, this requests, to the extent possible, that statically-scheduled -clusters handle stalls using a stall-enable signal to freeze computation -within the cluster. This attribute is ignored on the host. +The ``intel::use_stall_enable_clusters`` attribute requires SYCL. +When applied to a lambda function, function definition, or function call +operator (of a function object) on device, this requests, to the +extent possible, that statically-scheduled clusters handle stalls using a +stall-enable signal to freeze computation within the cluster. This attribute +is ignored on the host. -If ``intel::use_stall_enable_clusters`` is applied to a function called from a device -kernel, the attribute is ignored and it is not propagated to the kernel. +The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an +effect when applied to a function, and no effect otherwise. -The ``intel::use_stall_enable_clusters`` attribute takes no argument and has an effect -when applied to a function, and no effect otherwise. +.. code-block:: c++ + + class Foo { + public: + [[intel::use_stall_enable_clusters]] void operator()() const {} + }; + + [[intel::use_stall_enable_clusters]] void test() {} + + struct FuncObj { + [[intel::use_stall_enable_clusters]] void operator()() const {} + }; + +The ``intel::use_stall_enable_clusters`` attribute supports a nonconforming +behavior when applied to a lambda in the type position. .. code-block:: c++ - class Functor - { - [[intel::use_stall_enable_clusters]] void operator()(item<1> item) - { - /* kernel code */ - } + void test1() { + auto lambda = []() [[intel::use_stall_enable_clusters]]{}; + lambda(); } - kernel( - []() [[intel::use_stall_enable_clusters]] { - /* kernel code */ - }); - }]; } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index db9cdbddf3107..76a647d5de02b 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -761,12 +761,6 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, Fn->setMetadata("no_global_work_offset", llvm::MDNode::get(Context, {})); } - if (FD->hasAttr()) { - llvm::Metadata *AttrMDArgs[] = { - llvm::ConstantAsMetadata::get(Builder.getInt32(1))}; - Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs)); - } - if (const auto *A = FD->getAttr()) { const auto *CE = cast(A->getNThreadsExpr()); llvm::APSInt ArgVal = CE->getResultAsAPSInt(); @@ -1064,6 +1058,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, } } + if (getLangOpts().SYCLIsDevice && D && + D->hasAttr()) { + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Builder.getInt32(1))}; + Fn->setMetadata("stall_enable", + llvm::MDNode::get(getLLVMContext(), AttrMDArgs)); + } + if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null(D)) { diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 65c7465e7bdc2..4415c0af00b23 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -568,20 +568,6 @@ static void collectSYCLAttributes(Sema &S, FunctionDecl *FD, SYCLIntelNoGlobalWorkOffsetAttr, SYCLSimdAttr>(A); }); - // Allow the kernel attribute "use_stall_enable_clusters" only on lambda - // functions and function objects called directly from a kernel. - // For all other cases, emit a warning and ignore. - if (auto *A = FD->getAttr()) { - if (DirectlyCalled) { - Attrs.push_back(A); - } else { - S.Diag(A->getLocation(), - diag::warn_attribute_on_direct_kernel_callee_only) - << A; - FD->dropAttr(); - } - } - // Attributes that should not be propagated from device functions to a kernel. if (DirectlyCalled) { llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) { @@ -4128,7 +4114,6 @@ static void PropagateAndDiagnoseDeviceAttr( case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz: case attr::Kind::SYCLIntelMaxGlobalWorkDim: case attr::Kind::SYCLIntelNoGlobalWorkOffset: - case attr::Kind::SYCLIntelUseStallEnableClusters: case attr::Kind::SYCLIntelLoopFuse: case attr::Kind::SYCLIntelFPGAMaxConcurrency: case attr::Kind::SYCLIntelFPGADisableLoopPipelining: diff --git a/clang/test/CodeGenSYCL/stall_enable.cpp b/clang/test/CodeGenSYCL/stall_enable.cpp deleted file mode 100644 index 0c9a0ba7653b6..0000000000000 --- a/clang/test/CodeGenSYCL/stall_enable.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s - -#include "sycl.hpp" - -using namespace cl::sycl; -queue q; - -class Foo { -public: - [[intel::use_stall_enable_clusters]] void operator()() const {} -}; - -int main() { - q.submit([&](handler &h) { - Foo f; - h.single_task(f); - - h.single_task( - []() [[intel::use_stall_enable_clusters]]{}); - }); - return 0; -} - -// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() #0 {{.*}}!stall_enable ![[NUM5:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() #0 {{.*}}!stall_enable ![[NUM5]] -// CHECK: ![[NUM5]] = !{i32 1} diff --git a/clang/test/CodeGenSYCL/stall_enable_device.cpp b/clang/test/CodeGenSYCL/stall_enable_device.cpp new file mode 100644 index 0000000000000..a4dc7cd360a46 --- /dev/null +++ b/clang/test/CodeGenSYCL/stall_enable_device.cpp @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Device. +// The metadata to be attached to the functionDecl that the attribute is applied to. +// The attributes do not get propagated to kernel metadata i.e. spir_kernel. + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +[[intel::use_stall_enable_clusters]] void test() {} + +struct FuncObj { + [[intel::use_stall_enable_clusters]] void operator()() const {} +}; + +void test1() { + auto lambda = []() [[intel::use_stall_enable_clusters]]{}; + lambda(); +} + +class Foo { +public: + [[intel::use_stall_enable_clusters]] void operator()() const {} +}; + +int main() { + q.submit([&](handler &h) { + // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel1() #0 !kernel_arg_buffer_location ![[NUM4:[0-9]+]] + // CHECK: define {{.*}}spir_func void @{{.*}}FuncObjclEv(%struct.{{.*}}FuncObj addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5:[0-9]+]] + h.single_task( + FuncObj()); + + // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel2() #0 !kernel_arg_buffer_location ![[NUM4]] + // CHECK define {{.*}}spir_func void @{{.*}}FooclEv(%class._ZTS3Foo.Foo addrspace(4)* align 1 dereferenceable_or_null(1) %this) #3 comdat align 2 !stall_enable ![[NUM5]] + Foo f; + h.single_task(f); + + // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel3() #0 !kernel_arg_buffer_location ![[NUM4]] + // CHECK: define {{.*}}spir_func void @_Z4testv() #3 !stall_enable ![[NUM5]] + h.single_task( + []() { test(); }); + + // CHECK: define {{.*}}spir_kernel void @{{.*}}test_kernel4() #0 !kernel_arg_buffer_location ![[NUM4]] + // CHECK: define {{.*}}spir_func void @{{.*}}test1vENKUlvE_clEv(%class.{{.*}}test1{{.*}}.anon addrspace(4)* align 1 dereferenceable_or_null(1) %this) #4 align 2 !stall_enable ![[NUM5]] + h.single_task( + []() { test1(); }); + }); + return 0; +} + +// CHECK: ![[NUM4]] = !{} +// CHECK: ![[NUM5]] = !{i32 1} diff --git a/clang/test/CodeGenSYCL/stall_enable_host.cpp b/clang/test/CodeGenSYCL/stall_enable_host.cpp new file mode 100644 index 0000000000000..4f99f0dfa2bae --- /dev/null +++ b/clang/test/CodeGenSYCL/stall_enable_host.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -fsycl-is-host -triple -x86_64-unknown-linux-gnu -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +// Tests for IR of Intel FPGA [[intel::use_stall_enable_clusters]] function attribute on Host (no-op in IR-CodeGen for host-mode). + +[[intel::use_stall_enable_clusters]] void test() {} + +void test1() { + auto lambda = []() [[intel::use_stall_enable_clusters]]{}; + lambda(); +} + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +class KernelFunctor { +public: + [[intel::use_stall_enable_clusters]] void operator()() const {} + +}; + +void foo() { + + KernelFunctor f; + kernel(f); +} + +// CHECK-NOT: !stall_enable diff --git a/clang/test/SemaSYCL/stall_enable.cpp b/clang/test/SemaSYCL/stall_enable.cpp deleted file mode 100644 index 6cea6a2379c31..0000000000000 --- a/clang/test/SemaSYCL/stall_enable.cpp +++ /dev/null @@ -1,38 +0,0 @@ -// RUN: %clang_cc1 %s -fsyntax-only -internal-isystem %S/Inputs -fsycl-is-device -Wno-sycl-2017-compat -DTRIGGER_ERROR -verify -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat %s | FileCheck %s - -#include "sycl.hpp" - -using namespace cl::sycl; -queue q; - -[[intel::use_stall_enable_clusters]] void test() {} // expected-warning{{'use_stall_enable_clusters' attribute allowed only on a function directly called from a SYCL kernel}} - -#ifdef TRIGGER_ERROR -[[intel::use_stall_enable_clusters(1)]] void bar1() {} // expected-error{{'use_stall_enable_clusters' attribute takes no arguments}} -[[intel::use_stall_enable_clusters]] int N; // expected-error{{'use_stall_enable_clusters' attribute only applies to functions}} -#endif - -struct FuncObj { - [[intel::use_stall_enable_clusters]] void operator()() const {} -}; - -int main() { - q.submit([&](handler &h) { - // CHECK-LABEL: FunctionDecl {{.*}}test_kernel1 - // CHECK: SYCLIntelUseStallEnableClustersAttr {{.*}} - h.single_task( - FuncObj()); - - // CHECK-LABEL: FunctionDecl {{.*}}test_kernel2 - // CHECK: SYCLIntelUseStallEnableClustersAttr {{.*}} - h.single_task( - []() [[intel::use_stall_enable_clusters]]{}); - - // CHECK-LABEL: FunctionDecl {{.*}}test_kernel3 - // CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}} - h.single_task( - []() { test(); }); - }); - return 0; -} diff --git a/clang/test/SemaSYCL/stall_enable_device.cpp b/clang/test/SemaSYCL/stall_enable_device.cpp new file mode 100644 index 0000000000000..7df8e0d8a87c0 --- /dev/null +++ b/clang/test/SemaSYCL/stall_enable_device.cpp @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 %s -fsyntax-only -internal-isystem %S/Inputs -fsycl-is-device -Wno-sycl-2017-compat -DTRIGGER_ERROR -verify +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat %s | FileCheck %s + +// Test that checks [[intel::use_stall_enable_clusters]] attribute support on function. + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +// Test attribute is presented on function definition. +[[intel::use_stall_enable_clusters]] void test() {} +// CHECK: FunctionDecl{{.*}}test +// CHECK: SYCLIntelUseStallEnableClustersAttr + +// Tests for incorrect argument values for Intel FPGA use_stall_enable_clusters function attribute. +#ifdef TRIGGER_ERROR +[[intel::use_stall_enable_clusters(1)]] void test1() {} // expected-error{{'use_stall_enable_clusters' attribute takes no arguments}} +[[intel::use_stall_enable_clusters]] int test2; // expected-error{{'use_stall_enable_clusters' attribute only applies to functions}} +#endif + +// Test attribute is presented on function call operator (of a function object). +struct FuncObj { + [[intel::use_stall_enable_clusters]] void operator()() const {} + // CHECK: CXXRecordDecl{{.*}}implicit struct FuncObj + // CHECK-NEXT: CXXMethodDecl{{.*}}used operator() 'void () const' + // CHECK-NEXT-NEXT:SYCLIntelUseStallEnableClustersAttr +}; + +// Test attribute is presented on lambda function(applied to a function type for the lambda's call operator). +void test3() { + auto lambda = []() [[intel::use_stall_enable_clusters]]{}; + lambda(); + // CHECK: FunctionDecl{{.*}}test3 + // CHECK: LambdaExpr + // CHECK: SYCLIntelUseStallEnableClustersAttr +} + +int main() { + q.submit([&](handler &h) { + // Test attribute is not propagated to the kernel. + // CHECK-LABEL: FunctionDecl {{.*}}test_kernel1 + // CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}} + h.single_task( + FuncObj()); + + // Test attribute does not present on LambdaExpr called by kernel. + // CHECK-LABEL: FunctionDecl {{.*}}test_kernel2 + // CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}} + h.single_task( + []() [[intel::use_stall_enable_clusters]]{}); + + // Test attribute is not propagated to the kernel. + // CHECK-LABEL: FunctionDecl {{.*}}test_kernel3 + // CHECK-NOT: SYCLIntelUseStallEnableClustersAttr {{.*}} + h.single_task( + []() { test(); }); + }); + return 0; +}