diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 2e3b89bb33dad..93ff8013a7e2b 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1226,6 +1226,19 @@ def SYCLIntelNumSimdWorkItems : InheritableAttr { let PragmaAttributeSupport = 0; } +def SYCLIntelStallEnable : InheritableAttr { + let Spellings = [CXX11<"intel","stall_enable">]; + let LangOpts = [SYCLIsHost, SYCLIsDevice]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let AdditionalMembers = [{ + static const char *getName() { + return "stall_enable"; + } + }]; + let Documentation = [SYCLIntelStallEnableAttrDocs]; + let PragmaAttributeSupport = 0; +} + def SYCLIntelSchedulerTargetFmaxMhz : InheritableAttr { let Spellings = [CXX11<"intelfpga","scheduler_target_fmax_mhz">, CXX11<"intel","scheduler_target_fmax_mhz">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 530dae28eb03a..17868fb300a83 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2212,6 +2212,39 @@ device kernel, the attribute is ignored and it is not propagated to a kernel. }]; } +def SYCLIntelStallEnableAttrDocs : Documentation { + let Category = DocCatFunction; + let Heading = "intel::stall_enable"; + let Content = [{ +When applied to a lambda or function call operator (of a function object) +on device, this requests, to the extent possible, that statically-scheduled +clusters handle stalls using a stall-enable signal to freeze computation +within the cluster. This attribute is ignored on the host. + +If ``intel::stall_enable`` is applied to a function called from a device +kernel, the attribute is ignored and it is not propagated to a kernel. + +The ``intel::stall_enable`` attribute takes no argument and has an effect +when applied to a function, and no effect otherwise. + +.. code-block:: c++ + + class Functor + { + [[intel::stall_enable]] void operator()(item<1> item) + { + /* kernel code */ + } + } + + kernel( + []() [[intel::stall_enable]] { + /* kernel code */ + }); + + }]; +} + def ReqdWorkGroupSizeAttrDocs : Documentation { let Category = DocCatFunction; let Heading = "reqd_work_group_size"; diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index 1884813f2524a..57c1e50dad242 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -165,7 +165,8 @@ class AttributeCommonInfo { ParsedAttr == AT_SYCLIntelSchedulerTargetFmaxMhz || ParsedAttr == AT_SYCLIntelMaxWorkGroupSize || ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim || - ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset) + ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset || + ParsedAttr == AT_SYCLIntelStallEnable) return true; return false; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index d8836270a647b..a0891d26137db 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -676,6 +676,12 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, if (A->getEnabled()) Fn->setMetadata("no_global_work_offset", llvm::MDNode::get(Context, {})); } + + if (FD->hasAttr()) { + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Builder.getInt32(1))}; + Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs)); + } } /// Determine whether the function F ends with a return stmt. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1bf0749e00aa9..72384a1e2cc18 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3029,6 +3029,20 @@ static void handleNumSimdWorkItemsAttr(Sema &S, Decl *D, E); } +// Handles stall_enable +static void handleStallEnableAttr(Sema &S, Decl *D, const ParsedAttr &Attr) { + if (D->isInvalidDecl()) + return; + + unsigned NumArgs = Attr.getNumArgs(); + if (NumArgs > 0) { + S.Diag(Attr.getLoc(), diag::warn_attribute_too_many_arguments) << Attr << 0; + return; + } + + handleSimpleAttribute(S, D, Attr); +} + // Add scheduler_target_fmax_mhz void Sema::addSYCLIntelSchedulerTargetFmaxMhzAttr( Decl *D, const AttributeCommonInfo &Attr, Expr *E) { @@ -8397,6 +8411,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_SYCLIntelNoGlobalWorkOffset: handleNoGlobalWorkOffsetAttr(S, D, AL); break; + case ParsedAttr::AT_SYCLIntelStallEnable: + handleStallEnableAttr(S, D, AL); + break; case ParsedAttr::AT_VecTypeHint: handleVecTypeHint(S, D, AL); break; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 8ee923491bff0..57ae37e1f96e2 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -545,6 +545,20 @@ class MarkDeviceFunction : public RecursiveASTVisitor { if (auto *A = FD->getAttr()) Attrs.insert(A); + + // Allow the kernel attribute "stall_enable" only on lambda functions + // and function objects that are called directly from a kernel + // (i.e. the one passed to the single_task or parallel_for functions). + // For all other cases, emit a warning and ignore. + if (auto *A = FD->getAttr()) { + if (ParentFD == SYCLKernel) { + Attrs.insert(A); + } else { + SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A; + FD->dropAttr(); + } + } + // Propagate the explicit SIMD attribute through call graph - it is used // to distinguish ESIMD code in ESIMD LLVM passes. if (KernelBody && KernelBody->hasAttr() && @@ -3204,6 +3218,7 @@ void Sema::MarkDevice(void) { case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz: case attr::Kind::SYCLIntelMaxGlobalWorkDim: case attr::Kind::SYCLIntelNoGlobalWorkOffset: + case attr::Kind::SYCLIntelStallEnable: case attr::Kind::SYCLSimd: { if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody && !KernelBody->getAttr()) { diff --git a/clang/test/CodeGenSYCL/stall_enable.cpp b/clang/test/CodeGenSYCL/stall_enable.cpp new file mode 100644 index 0000000000000..f6d9644dae479 --- /dev/null +++ b/clang/test/CodeGenSYCL/stall_enable.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +class Foo { +public: + [[intel::stall_enable]] void operator()() const {} +}; + +int main() { + q.submit([&](handler &h) { + Foo f; + h.single_task(f); + + h.single_task( + []() [[intel::stall_enable]]{}); + }); + return 0; +} + +// CHECK: define spir_kernel void @"{{.*}}test_kernel1"() #0 {{.*}} !stall_enable ![[NUM5:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}test_kernel2"() #0 {{.*}} !stall_enable ![[NUM5]] +// CHECK: ![[NUM5]] = !{i32 1} diff --git a/clang/test/SemaSYCL/stall_enable.cpp b/clang/test/SemaSYCL/stall_enable.cpp new file mode 100644 index 0000000000000..832ea1808bca3 --- /dev/null +++ b/clang/test/SemaSYCL/stall_enable.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 %s -fsyntax-only -fsycl -internal-isystem %S/Inputs -fsycl-is-device -Wno-sycl-2017-compat -DTRIGGER_ERROR -verify +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +[[intel::stall_enable]] void test() {} //expected-warning{{'stall_enable' attribute ignored}} + +#ifdef TRIGGER_ERROR +[[intel::stall_enable(1)]] void bar1() {} // expected-error{{'stall_enable' attribute takes no arguments}} +[[intel::stall_enable]] int N; // expected-error{{'stall_enable' attribute only applies to functions}} +#endif + +struct FuncObj { + [[intel::stall_enable]] void operator()() const {} +}; + +int main() { + q.submit([&](handler &h) { + // CHECK-LABEL: FunctionDecl {{.*}}test_kernel1 + // CHECK: SYCLIntelStallEnableAttr {{.*}} + h.single_task( + FuncObj()); + + // CHECK-LABEL: FunctionDecl {{.*}}test_kernel2 + // CHECK: SYCLIntelStallEnableAttr {{.*}} + h.single_task( + []() [[intel::stall_enable]]{}); + + // CHECK-LABEL: FunctionDecl {{.*}}test_kernel3 + // CHECK-NOT: SYCLIntelStallEnableAttr {{.*}} + h.single_task( + []() { test(); }); + }); + return 0; +}