forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathAMDGPULowerKernelCalls.cpp
111 lines (96 loc) · 3.34 KB
/
AMDGPULowerKernelCalls.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
//===-- AMDGPULowerKernelCalls.cpp - Fix kernel calls ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// \file
//
// \brief replace calls to OpenCL kernels with equivalent non-kernel
// functions
//
// In OpenCL, a kernel may call another kernel as if it was a non-kernel
// function. However, kernels and functions have different ABI. To fix this,
// we copy the body of kernel A into a new non-kernel function fA, if we
// encounter a call to A. All calls to A are then transferred to fA.
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
namespace {
class AMDGPULowerKernelCalls : public ModulePass {
public:
static char ID;
explicit AMDGPULowerKernelCalls();
StringRef getPassName() const override {
return "AMDGPU Lower Kernel Calls";
}
private:
bool runOnModule(Module &M) override;
};
} // end anonymous namespace
char AMDGPULowerKernelCalls::ID = 0;
namespace llvm {
void initializeAMDGPULowerKernelCallsPass(PassRegistry &);
ModulePass *createAMDGPULowerKernelCallsPass() {
return new AMDGPULowerKernelCalls();
}
}
char &llvm::AMDGPULowerKernelCallsID = AMDGPULowerKernelCalls::ID;
INITIALIZE_PASS(
AMDGPULowerKernelCalls, "amdgpu-lower-kernel-calls",
"Lower calls to kernel functions into non-kernel function calls.", false,
false)
AMDGPULowerKernelCalls::AMDGPULowerKernelCalls() : ModulePass(ID) {
initializeAMDGPULowerKernelCallsPass(*PassRegistry::getPassRegistry());
}
static void setNameForBody(Function *FBody, const Function &FKernel) {
StringRef Name = FKernel.getName();
SmallString<128> NewName("__amdgpu_");
NewName += Name;
NewName += "_kernel_body";
FBody->setName(NewName.str());
}
static Function *cloneKernel(Function &F) {
ValueToValueMapTy ignored;
Function *NewF = F.empty()
? Function::Create(
F.getFunctionType(), Function::ExternalLinkage, "",
F.getParent())
: CloneFunction(&F, ignored);
NewF->setCallingConv(CallingConv::C);
// If we are copying a definition, we know there are no external references
// and we can force internal linkage.
if (!NewF->isDeclaration()) {
NewF->setVisibility(GlobalValue::DefaultVisibility);
NewF->setLinkage(GlobalValue::InternalLinkage);
}
setNameForBody(NewF, F);
return NewF;
}
bool AMDGPULowerKernelCalls::runOnModule(Module &M) {
bool Changed = false;
for (auto &F : M) {
if (CallingConv::AMDGPU_KERNEL != F.getCallingConv())
continue;
Function *FBody = nullptr;
for (Use &U : make_early_inc_range(F.uses())) {
CallBase *CI = dyn_cast<CallBase>(U.getUser());
if (!CI || !CI->isCallee(&U))
continue;
if (!FBody)
FBody = cloneKernel(F);
CI->setCalledFunction(FBody);
CI->setCallingConv(CallingConv::C);
Changed = true;
}
}
return Changed;
}