Skip to content

Commit

Permalink
[ESIMD] Add set_kernel_properties API and use_double_grf property.
Browse files Browse the repository at this point in the history
This patch:
1) Adds esimd::set_kernel_properties API with the single supported property
   esimd::kernel_properties::use_double_grf, which lets compiler know that
   the calling kernel needs run in "double GRF" mode - more registers per
   thread at the expense of fewer H/W threads.
   This is temporary API until generic SYCL support for kernel properties
   is implemented:
   https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc
2) Provides "lowering" of this API by the LowerESIMD.cpp, which marks such
   kernels with "esimd-double-grf" function attribute.
3) Implements new "dimension" of device code splitting in sycl-post-link:
   functions with and without "esimd-double-grf" attribute go to different
   modules. Device binary images resulting from "double-grf" modules are
   assigned the "isDoubleGRFEsimdImage" property
4) Updates runtime to add "-doubleGRF" option when JITting SPIRV binaries
   with the "isDoubleGRFEsimdImage" property.
5) Fixes sycl-post-link bug in ModuleSplitter.cpp:extractSubModule, where
   Function objects in the entry point list were not replaced with new
   Function objects in the cloned Module. This lead to corrupted symbol file in
   some cases.

Signed-off-by: Konstantin S Bobrovsky <konstantin.s.bobrovsky@intel.com>
  • Loading branch information
kbobrovs committed May 23, 2022
1 parent 3e1c1bf commit 2e1f97b
Show file tree
Hide file tree
Showing 9 changed files with 435 additions and 103 deletions.
155 changes: 115 additions & 40 deletions llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ class SYCLLowerESIMDLegacyPass : public ModulePass {
};
} // namespace

constexpr char ATTR_DOUBLE_GRF[] = "esimd-double-grf";

char SYCLLowerESIMDLegacyPass::ID = 0;
INITIALIZE_PASS(SYCLLowerESIMDLegacyPass, "LowerESIMD",
"Lower constructs specific to Close To Metal", false, false)
Expand Down Expand Up @@ -899,52 +901,30 @@ static inline llvm::Metadata *getMD(llvm::Value *V) {
return llvm::ValueAsMetadata::get(V);
}

/// Updates genx.kernels metadata attribute \p MD for the given function \p F.
/// The value of the attribute is updated only if the new value \p NewVal is
/// bigger than what is already stored in the attribute.
// TODO: 1) In general this function is supposed to handle intrinsics
// translated into kernel's metadata. So, the primary/intended usage model is
// when such intrinsics are called from kernels.
// 2) For now such intrinsics are also handled in functions directly called
// from kernels and being translate into those caller-kernel meeven though such
// behaviour is not fully specified/documented.
// 3) This code (or the code in FE) must verify that slm_init or other such
// intrinsic is not called from another module because kernels in that other
// module would not get updated meta data attributes.
static void updateGenXMDNodes(llvm::Function *F, genx::KernelMDOp MD,
uint64_t NewVal) {
llvm::NamedMDNode *GenXKernelMD =
F->getParent()->getNamedMetadata(GENX_KERNEL_METADATA);
assert(GenXKernelMD && "invalid genx.kernels metadata");
static bool isESIMDKernel(const Function &F) {
return (F.getCallingConv() == CallingConv::SPIR_KERNEL) &&
(F.getMetadata("sycl_explicit_simd") != nullptr);
}

template <class CallGraphNodeF>
static void traverseCallgraphUp(llvm::Function *F, CallGraphNodeF ApplyF) {
SmallPtrSet<Function *, 32> FunctionsVisited;
SmallVector<Function *, 32> Worklist{F};

while (!Worklist.empty()) {
Function *CurF = Worklist.pop_back_val();
FunctionsVisited.insert(CurF);

// Update the meta data attribute for the current function.
for (auto Node : GenXKernelMD->operands()) {
if (Node->getNumOperands() <= MD ||
getVal(Node->getOperand(genx::KernelMDOp::FunctionRef)) != CurF)
continue;

llvm::Value *Old = getVal(Node->getOperand(MD));
uint64_t OldVal = cast<llvm::ConstantInt>(Old)->getZExtValue();
if (OldVal < NewVal) {
llvm::Value *New = llvm::ConstantInt::get(Old->getType(), NewVal);
Node->replaceOperandWith(MD, getMD(New));
}
}
// Apply the action function.
ApplyF(CurF);

// Update all callers as well.
for (auto It = CurF->use_begin(); It != CurF->use_end(); It++) {
auto FCall = It->getUser();
if (!isa<CallInst>(FCall))
llvm::report_fatal_error(
llvm::Twine(__FILE__ " ") +
"Found an intrinsic violating assumption on usage from a kernel or "
"a func directly called from a kernel");
"Function use other than call detected while traversing call\n"
"graph starting from kernel property mark-up intrinsic.");

auto FCaller = cast<CallInst>(FCall)->getFunction();
if (!FunctionsVisited.count(FCaller))
Expand All @@ -953,6 +933,68 @@ static void updateGenXMDNodes(llvm::Function *F, genx::KernelMDOp MD,
}
}

// A functor which updates ESIMD kernel's uint64_t metadata in case it is less
// than the given one. Used in callgraph traversal to update nbarriers or SLM
// size metadata. Update is performed by the '()' operator and happens only
// when given function matches one of the kernels - thus, only reachable kernels
// are updated.
// TODO: 1) In general this function is supposed to handle intrinsics
// translated into kernel's metadata. So, the primary/intended usage model is
// when such intrinsics are called from kernels.
// 2) For now such intrinsics are also handled in functions directly called
// from kernels and being translate into those caller-kernel meeven though such
// behaviour is not fully specified/documented.
// 3) This code (or the code in FE) must verify that slm_init or other such
// intrinsic is not called from another module because kernels in that other
// module would not get updated meta data attributes.
struct UpdateUint64MetaDataToMaxValue {
Module &M;
// The uint64_t metadata key to update.
genx::KernelMDOp Key;
// The new metadata value. Must be greater than the old for update to happen.
uint64_t NewVal;
// Pre-selected nodes from GENX_KERNEL_METADATA which can only potentially be
// updated.
SmallVector<MDNode *, 4> CandidatesToUpdate;

UpdateUint64MetaDataToMaxValue(Module &M, genx::KernelMDOp Key,
uint64_t NewVal)
: M(M), Key(Key), NewVal(NewVal) {
// Pre-select nodes for update to do less work in the '()' operator.
llvm::NamedMDNode *GenXKernelMD = M.getNamedMetadata(GENX_KERNEL_METADATA);
assert(GenXKernelMD && "invalid genx.kernels metadata");
for (auto Node : GenXKernelMD->operands()) {
if (Node->getNumOperands() <= (unsigned)Key) {
continue;
}
llvm::Value *Old = getVal(Node->getOperand(Key));
uint64_t OldVal = cast<llvm::ConstantInt>(Old)->getZExtValue();

if (OldVal < NewVal) {
CandidatesToUpdate.push_back(Node);
}
}
}

void operator()(Function *F) {
// Update the meta data attribute for the current function.
for (auto Node : CandidatesToUpdate) {
assert(Node->getNumOperands() > (unsigned)Key);

if (getVal(Node->getOperand(genx::KernelMDOp::FunctionRef)) != F) {
continue;
}
llvm::Value *Old = getVal(Node->getOperand(Key));
#ifndef _NDEBUG
uint64_t OldVal = cast<llvm::ConstantInt>(Old)->getZExtValue();
assert(OldVal < NewVal);
#endif // _NDEBUG
llvm::Value *New = llvm::ConstantInt::get(Old->getType(), NewVal);
Node->replaceOperandWith(Key, getMD(New));
}
}
};

// This function sets/updates VCSLMSize attribute to the kernels
// calling this intrinsic initializing SLM memory.
static void translateSLMInit(CallInst &CI) {
Expand All @@ -964,7 +1006,9 @@ static void translateSLMInit(CallInst &CI) {

uint64_t NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
assert(NewVal != 0 && "zero slm bytes being requested");
updateGenXMDNodes(F, genx::KernelMDOp::SLMSize, NewVal);
UpdateUint64MetaDataToMaxValue SetMaxSLMSize{
*F->getParent(), genx::KernelMDOp::SLMSize, NewVal};
traverseCallgraphUp(F, SetMaxSLMSize);
}

// This function sets/updates VCNamedBarrierCount attribute to the kernels
Expand All @@ -979,7 +1023,9 @@ static void translateNbarrierInit(CallInst &CI) {

auto NewVal = cast<llvm::ConstantInt>(ArgV)->getZExtValue();
assert(NewVal != 0 && "zero named barrier count being requested");
updateGenXMDNodes(F, genx::KernelMDOp::NBarrierCnt, NewVal);
UpdateUint64MetaDataToMaxValue SetMaxNBarrierCnt{
*F->getParent(), genx::KernelMDOp::NBarrierCnt, NewVal};
traverseCallgraphUp(F, SetMaxNBarrierCnt);
}

static void translatePackMask(CallInst &CI) {
Expand Down Expand Up @@ -1099,6 +1145,33 @@ static void translateGetSurfaceIndex(CallInst &CI) {
CI.replaceAllUsesWith(SI);
}

// Kernel property identifiers. Should match ones in
// sycl/include/sycl/ext/intel/experimental/esimd/kernel_properties.hpp
enum property_ids { use_double_grf = 0 };

static void translateSetKernelProperties(CallInst &CI) {
auto F = CI.getFunction();
auto *ArgV = CI.getArgOperand(0);
if (!isa<ConstantInt>(ArgV))
llvm::report_fatal_error(
llvm::Twine(__FILE__ " ") +
"integral constant is expected for set_kernel_properties");
uint64_t PropID = cast<llvm::ConstantInt>(ArgV)->getZExtValue();

switch (PropID) {
case property_ids::use_double_grf:
traverseCallgraphUp(F, [](Function *GraphNode) {
if (!isESIMDKernel(*GraphNode)) {
return;
}
GraphNode->addFnAttr(ATTR_DOUBLE_GRF);
});
break;
default:
assert(false && "Invalid property id");
}
}

// Newly created GenX intrinsic might have different return type than expected.
// This helper function creates cast operation from GenX intrinsic return type
// to currently expected. Returns pointer to created cast instruction if it
Expand Down Expand Up @@ -1514,8 +1587,7 @@ void generateKernelMetadata(Module &M) {

for (auto &F : M.functions()) {
// Skip non-SIMD kernels.
if (F.getCallingConv() != CallingConv::SPIR_KERNEL ||
F.getMetadata("sycl_explicit_simd") == nullptr)
if (!isESIMDKernel(F))
continue;

// Metadata node containing N i32s, where N is the number of kernel
Expand Down Expand Up @@ -1708,15 +1780,14 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,

// process ESIMD builtins that go through special handling instead of
// the translation procedure
// TODO FIXME slm_init should be made top-level __esimd_slm_init

if (Name.startswith("__esimd_slm_init") &&
isa<ConstantInt>(CI->getArgOperand(0))) {
// tag the kernel with meta-data SLMSize, and remove this builtin
translateSLMInit(*CI);
ToErase.push_back(CI);
continue;
}

if (Name.startswith("__esimd_nbarrier_init")) {
translateNbarrierInit(*CI);
ToErase.push_back(CI);
Expand Down Expand Up @@ -1748,12 +1819,16 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F,
continue;
}
}

if (Name.startswith("__esimd_get_surface_index")) {
translateGetSurfaceIndex(*CI);
ToErase.push_back(CI);
continue;
}
if (Name.startswith("__esimd_set_kernel_properties")) {
translateSetKernelProperties(*CI);
ToErase.push_back(CI);
continue;
}

if (Name.empty() || !Name.startswith(ESIMD_INTRIN_PREF1))
continue;
Expand Down
69 changes: 69 additions & 0 deletions llvm/test/tools/sycl-post-link/sycl-esimd/sycl-esimd-double-grf.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
; This test checks handling of the
; set_kernel_properties(kernel_properties::use_double_grf);
; by the post-link-tool:
; - ESIMD/SYCL splitting happens as usual
; - ESIMD module is further split into callgraphs for entry points requesting
; "double GRF" and callgraphs for entry points which are not
; - Compiler adds 'isDoubleGRFEsimdImage' property to the ESIMD device binary
; images requesting "double GRF"

; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S %s -o %t.table
; RUN: FileCheck %s -input-file=%t.table
; RUN: FileCheck %s -input-file=%t_esimd_x2grf_0.ll --check-prefixes CHECK-ESIMD-2xGRF-IR
; RUN: FileCheck %s -input-file=%t_esimd_x2grf_0.prop --check-prefixes CHECK-ESIMD-2xGRF-PROP
; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-SYM
; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-SYM
; RUN: FileCheck %s -input-file=%t_esimd_x2grf_0.sym --check-prefixes CHECK-ESIMD-2xGRF-SYM

; CHECK: [Code|Properties|Symbols]
; CHECK: {{.*}}esimd_0.ll|{{.*}}esimd_0.prop|{{.*}}esimd_0.sym
; CHECK: {{.*}}esimd_x2grf_0.ll|{{.*}}esimd_x2grf_0.prop|{{.*}}esimd_x2grf_0.sym
; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym

; CHECK-ESIMD-2xGRF-PROP: isEsimdImage=1|1
; CHECK-ESIMD-2xGRF-PROP: isDoubleGRFEsimdImage=1|1

; CHECK-SYCL-SYM: __SYCL_kernel
; CHECK-SYCL-SYM-EMPTY:

; CHECK-ESIMD-SYM: __ESIMD_kernel
; CHECK-ESIMD-SYM-EMPTY:

; CHECK-ESIMD-2xGRF-SYM: __ESIMD_double_grf_kernel
; CHECK-ESIMD-2xGRF-SYM-EMPTY:

; ModuleID = 'double_grf.bc'
source_filename = "llvm-link"
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

define weak_odr dso_local spir_kernel void @__SYCL_kernel() #0 {
entry:
ret void
}

define weak_odr dso_local spir_kernel void @__ESIMD_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 {
entry:
ret void
}

define dso_local spir_func void @_Z17double_grf_markerv() {
entry:
call spir_func void @_Z29__esimd_set_kernel_propertiesi(i32 noundef 0)
; -- Check that ESIMD lowering removed the marker call above:
; CHECK-ESIMD-2xGRF-IR-NOT: {{.*}} @_Z29__esimd_set_kernel_propertiesi
ret void
}

declare dso_local spir_func void @_Z29__esimd_set_kernel_propertiesi(i32 noundef)

define weak_odr dso_local spir_kernel void @__ESIMD_double_grf_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 {
entry:
call spir_func void @_Z17double_grf_markerv()
ret void
}

attributes #0 = { "sycl-module-id"="a.cpp" }

!0 = !{}
!1 = !{i32 1}
Loading

0 comments on commit 2e1f97b

Please sign in to comment.