Skip to content
13 changes: 13 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1226,6 +1226,19 @@ def SYCLIntelNumSimdWorkItems : InheritableAttr {
let PragmaAttributeSupport = 0;
}

def SYCLIntelStallEnable : InheritableAttr {
let Spellings = [CXX11<"intel","stall_enable">];
let LangOpts = [SYCLIsHost, SYCLIsDevice];
let Subjects = SubjectList<[Function], ErrorDiag>;
let AdditionalMembers = [{
static const char *getName() {
return "stall_enable";
}
}];
let Documentation = [SYCLIntelStallEnableAttrDocs];
let PragmaAttributeSupport = 0;
}

def SYCLIntelSchedulerTargetFmaxMhz : InheritableAttr {
let Spellings = [CXX11<"intelfpga","scheduler_target_fmax_mhz">,
CXX11<"intel","scheduler_target_fmax_mhz">];
Expand Down
33 changes: 33 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2212,6 +2212,39 @@ device kernel, the attribute is ignored and it is not propagated to a kernel.
}];
}

def SYCLIntelStallEnableAttrDocs : Documentation {
let Category = DocCatFunction;
let Heading = "intel::stall_enable";
let Content = [{
When applied to a lambda or function call operator (of a function object)
on device, this requests, to the extent possible, that statically-scheduled
clusters handle stalls using a stall-enable signal to freeze computation
within the cluster. This attribute is ignored on the host.

If ``intel::stall_enable`` is applied to a function called from a device
kernel, the attribute is ignored and it is not propagated to a kernel.

The ``intel::stall_enable`` attribute takes no argument and has an effect
when applied to a function, and no effect otherwise.

.. code-block:: c++

class Functor
{
[[intel::stall_enable]] void operator()(item<1> item)
{
/* kernel code */
}
}

kernel<class kernel_name>(
[]() [[intel::stall_enable]] {
/* kernel code */
});

}];
}

def ReqdWorkGroupSizeAttrDocs : Documentation {
let Category = DocCatFunction;
let Heading = "reqd_work_group_size";
Expand Down
3 changes: 2 additions & 1 deletion clang/include/clang/Basic/AttributeCommonInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,8 @@ class AttributeCommonInfo {
ParsedAttr == AT_SYCLIntelSchedulerTargetFmaxMhz ||
ParsedAttr == AT_SYCLIntelMaxWorkGroupSize ||
ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim ||
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset)
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset ||
ParsedAttr == AT_SYCLIntelStallEnable)
return true;

return false;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,12 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
if (A->getEnabled())
Fn->setMetadata("no_global_work_offset", llvm::MDNode::get(Context, {}));
}

if (FD->hasAttr<SYCLIntelStallEnableAttr>()) {
llvm::Metadata *AttrMDArgs[] = {
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs));
}
}

/// Determine whether the function F ends with a return stmt.
Expand Down
17 changes: 17 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3029,6 +3029,20 @@ static void handleNumSimdWorkItemsAttr(Sema &S, Decl *D,
E);
}

// Handles stall_enable
static void handleStallEnableAttr(Sema &S, Decl *D, const ParsedAttr &Attr) {
if (D->isInvalidDecl())
return;

unsigned NumArgs = Attr.getNumArgs();
if (NumArgs > 0) {
S.Diag(Attr.getLoc(), diag::warn_attribute_too_many_arguments) << Attr << 0;
return;
}

handleSimpleAttribute<SYCLIntelStallEnableAttr>(S, D, Attr);
}

// Add scheduler_target_fmax_mhz
void Sema::addSYCLIntelSchedulerTargetFmaxMhzAttr(
Decl *D, const AttributeCommonInfo &Attr, Expr *E) {
Expand Down Expand Up @@ -8397,6 +8411,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_SYCLIntelNoGlobalWorkOffset:
handleNoGlobalWorkOffsetAttr(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelStallEnable:
handleStallEnableAttr(S, D, AL);
break;
case ParsedAttr::AT_VecTypeHint:
handleVecTypeHint(S, D, AL);
break;
Expand Down
15 changes: 15 additions & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,20 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {

if (auto *A = FD->getAttr<SYCLSimdAttr>())
Attrs.insert(A);

// Allow the kernel attribute "stall_enable" only on lambda functions
// and function objects that are called directly from a kernel
// (i.e. the one passed to the single_task or parallel_for functions).
// For all other cases, emit a warning and ignore.
if (auto *A = FD->getAttr<SYCLIntelStallEnableAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
} else {
SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
FD->dropAttr<SYCLIntelStallEnableAttr>();
}
}

// Propagate the explicit SIMD attribute through call graph - it is used
// to distinguish ESIMD code in ESIMD LLVM passes.
if (KernelBody && KernelBody->hasAttr<SYCLSimdAttr>() &&
Expand Down Expand Up @@ -3204,6 +3218,7 @@ void Sema::MarkDevice(void) {
case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz:
case attr::Kind::SYCLIntelMaxGlobalWorkDim:
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
case attr::Kind::SYCLIntelStallEnable:
case attr::Kind::SYCLSimd: {
if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody &&
!KernelBody->getAttr<SYCLSimdAttr>()) {
Expand Down
26 changes: 26 additions & 0 deletions clang/test/CodeGenSYCL/stall_enable.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

class Foo {
public:
[[intel::stall_enable]] void operator()() const {}
};

int main() {
q.submit([&](handler &h) {
Foo f;
h.single_task<class test_kernel1>(f);

h.single_task<class test_kernel2>(
[]() [[intel::stall_enable]]{});
});
return 0;
}

// CHECK: define spir_kernel void @"{{.*}}test_kernel1"() #0 {{.*}} !stall_enable ![[NUM5:[0-9]+]]
// CHECK: define spir_kernel void @"{{.*}}test_kernel2"() #0 {{.*}} !stall_enable ![[NUM5]]
// CHECK: ![[NUM5]] = !{i32 1}
38 changes: 38 additions & 0 deletions clang/test/SemaSYCL/stall_enable.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// RUN: %clang_cc1 %s -fsyntax-only -fsycl -internal-isystem %S/Inputs -fsycl-is-device -Wno-sycl-2017-compat -DTRIGGER_ERROR -verify
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat %s | FileCheck %s

#include "sycl.hpp"

using namespace cl::sycl;
queue q;

[[intel::stall_enable]] void test() {} //expected-warning{{'stall_enable' attribute ignored}}

#ifdef TRIGGER_ERROR
[[intel::stall_enable(1)]] void bar1() {} // expected-error{{'stall_enable' attribute takes no arguments}}
[[intel::stall_enable]] int N; // expected-error{{'stall_enable' attribute only applies to functions}}
#endif

struct FuncObj {
[[intel::stall_enable]] void operator()() const {}
};

int main() {
q.submit([&](handler &h) {
// CHECK-LABEL: FunctionDecl {{.*}}test_kernel1
// CHECK: SYCLIntelStallEnableAttr {{.*}}
h.single_task<class test_kernel1>(
FuncObj());

// CHECK-LABEL: FunctionDecl {{.*}}test_kernel2
// CHECK: SYCLIntelStallEnableAttr {{.*}}
h.single_task<class test_kernel2>(
[]() [[intel::stall_enable]]{});

// CHECK-LABEL: FunctionDecl {{.*}}test_kernel3
// CHECK-NOT: SYCLIntelStallEnableAttr {{.*}}
h.single_task<class test_kernel3>(
[]() { test(); });
});
return 0;
}