diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index d3451c5f21231..05ac9f4b3ea02 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -1244,6 +1244,14 @@ PreservedAnalyses SYCLLowerESIMDPass::run(Module &M, size_t SYCLLowerESIMDPass::runOnFunction(Function &F, SmallPtrSet &GVTS) { + // There is a current limitation of GPU vector backend that requires kernel + // functions to be inlined into the kernel itself. To overcome this + // limitation, mark every function called from ESIMD kernel with + // 'alwaysinline' attribute. + if ((F.getCallingConv() != CallingConv::SPIR_KERNEL) && + !F.hasFnAttribute(Attribute::AlwaysInline)) + F.addFnAttr(Attribute::AlwaysInline); + SmallVector ESIMDIntrCalls; SmallVector ESIMDToErases; diff --git a/llvm/test/SYCLLowerIR/esimd_lower_inline_hint.ll b/llvm/test/SYCLLowerIR/esimd_lower_inline_hint.ll new file mode 100644 index 0000000000000..836d3010fd549 --- /dev/null +++ b/llvm/test/SYCLLowerIR/esimd_lower_inline_hint.ll @@ -0,0 +1,39 @@ +; RUN: opt -LowerESIMD -S < %s | FileCheck %s + +; This test checks that LowerESIMD pass sets the 'alwaysinline' +; attribute for all non-kernel functions. + +define spir_kernel void @EsimdKernel1() { +; CHECK: @EsimdKernel1( +; CHECK-NEXT: call void @foo() +; CHECK-NEXT: call void @bar() + call void @foo() + call void @bar() + ret void +} + +define spir_kernel void @EsimdKernel2() { +; CHECK: @EsimdKernel2( +; CHECK-NEXT: call void @foobar() + call void @foobar() + ret void +} + +define spir_func void @foo() { +; CHECK: @foo() #[[ATTR:[0-9]+]] + ret void +} + +define spir_func void @bar() { +; CHECK: @bar() #[[ATTR]] +; CHECK-NEXT: call void @foobar + call void @foobar() + ret void +} + +define spir_func void @foobar() { +; CHECK: @foobar() #[[ATTR]] + ret void +} + +; CHECK: attributes #[[ATTR]] = { alwaysinline } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index ea1ae9ee535b3..6c9289123daad 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -36,6 +36,7 @@ #include "llvm/Support/SystemUtils.h" #include "llvm/Support/WithColor.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/GlobalDCE.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/Scalar.h" @@ -668,30 +669,6 @@ static string_vector saveResultSymbolsLists(string_vector &ResSymbolsLists, } \ } -// Helper function for creating Inliner pass. -// The approach is taken from opt tool. -static Pass *createFunctionInliningPassHelper() { - if (OptLevelO0) - return createFunctionInliningPass(0, 0, false); - - if (OptLevelO1) - return createFunctionInliningPass(1, 0, false); - - if (OptLevelO2) - return createFunctionInliningPass(2, 0, false); - - if (OptLevelOs) - return createFunctionInliningPass(2, 1, false); - - if (OptLevelOz) - return createFunctionInliningPass(2, 2, false); - - if (OptLevelO3) - return createFunctionInliningPass(3, 0, false); - - return createFunctionInliningPass(); -} - // When ESIMD code was separated from the regular SYCL code, // we can safely process ESIMD part. // TODO: support options like -debug-pass, -print-[before|after], and others @@ -699,9 +676,8 @@ static void LowerEsimdConstructs(Module &M) { legacy::PassManager MPM; MPM.add(createSYCLLowerESIMDPass()); if (!OptLevelO0) { - // Inlining and SROA passes are required to make - // ESIMD/accessor_gather_scatter.cpp test work. - MPM.add(createFunctionInliningPassHelper()); + // Force-inline all functions marked 'alwaysinline' by the LowerESIMD pass. + MPM.add(createAlwaysInlinerLegacyPass()); MPM.add(createSROAPass()); } MPM.add(createESIMDLowerVecArgPass()); @@ -711,7 +687,7 @@ static void LowerEsimdConstructs(Module &M) { MPM.add(createEarlyCSEPass(true)); MPM.add(createInstructionCombiningPass()); MPM.add(createDeadCodeEliminationPass()); - MPM.add(createFunctionInliningPassHelper()); + // TODO: maybe remove some passes below that don't affect code quality MPM.add(createSROAPass()); MPM.add(createEarlyCSEPass(true)); MPM.add(createInstructionCombiningPass());