Skip to content

Commit e8b255d

Browse files
fhossein-quicrutkoorBrendon CahoonabhikranSumanth Gundapaneni
authored
Hexagon QFP Optimizer (#163843)
Co-authored-by: Rahul Utkoor <quic_rutkoor@quicinc.com> Co-authored-by: Brendon Cahoon <bcahoon@quicinc.com> Co-authored-by: abhikran <abhikran@codeaurora.org> Co-authored-by: Sumanth Gundapaneni <sgundapa@quicinc.com> Co-authored-by: Ikhlas Ajbar <iajbar@quicinc.com> Co-authored-by: Anirudh Sundar <quic_sanirudh@quicinc.com> Co-authored-by: Yashas Andaluri <quic_yandalur@quicinc.com> Co-authored-by: quic-santdas <quic_santdas@quicinc.com>
1 parent cd67ca2 commit e8b255d

File tree

12 files changed

+677
-0
lines changed

12 files changed

+677
-0
lines changed

llvm/lib/Target/Hexagon/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
5454
HexagonOptAddrMode.cpp
5555
HexagonOptimizeSZextends.cpp
5656
HexagonPeephole.cpp
57+
HexagonQFPOptimizer.cpp
5758
HexagonRDFOpt.cpp
5859
HexagonRegisterInfo.cpp
5960
HexagonSelectionDAGInfo.cpp

llvm/lib/Target/Hexagon/Hexagon.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
6767
void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
6868
void initializeHexagonVectorPrintPass(PassRegistry &);
6969

70+
void initializeHexagonQFPOptimizerPass(PassRegistry &);
71+
7072
Pass *createHexagonLoopIdiomPass();
7173
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
7274

@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
112114
FunctionPass *createHexagonVectorPrint();
113115
FunctionPass *createHexagonVExtract();
114116
FunctionPass *createHexagonExpandCondsets();
117+
FunctionPass *createHexagonQFPOptimizer();
115118

116119
} // end namespace llvm;
117120

Lines changed: 334 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,334 @@
1+
//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
2+
// optimizer ------------------===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// Basic infrastructure for optimizing intermediate conversion instructions
11+
// generated while performing vector floating point operations.
12+
// Currently run at the starting of the code generation for Hexagon, cleans
13+
// up redundant conversion instructions and replaces the uses of conversion
14+
// with appropriate machine operand. Liveness is preserved after this pass.
15+
//
16+
// @note: The redundant conversion instructions are not eliminated in this pass.
17+
// In this pass, we are only trying to replace the uses of conversion
18+
// instructions with its appropriate QFP instruction. We are leaving the job to
19+
// Dead instruction Elimination pass to remove redundant conversion
20+
// instructions.
21+
//
22+
// Brief overview of working of this QFP optimizer.
23+
// This version of Hexagon QFP optimizer basically iterates over each
24+
// instruction, checks whether if it belongs to hexagon floating point HVX
25+
// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
26+
// definition for the machine operands corresponding to the instruction.
27+
//
28+
// Example:
29+
// MachineInstruction *MI be the HVX vadd instruction
30+
// MI -> $v0 = V6_vadd_sf $v1, $v2
31+
// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
32+
// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
33+
//
34+
// In the above example, DefMI1 and DefMI2 gives the unique definitions
35+
// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
36+
//
37+
// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
38+
// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
39+
// iterates over next instruction.
40+
//
41+
// If one the definitions is conversion instruction then our pass will replace
42+
// the arithmetic instruction with its corresponding mix variant.
43+
// In the above example, if $v1 is conversion instruction
44+
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
45+
// After Transformation:
46+
// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
47+
//
48+
// If both the definitions are conversion instructions then the instruction will
49+
// be replaced with its qf variant
50+
// In the above example, if $v1 and $v2 are conversion instructions
51+
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
52+
// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
53+
// After Transformation:
54+
// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
55+
// with $v4)
56+
//
57+
// Currently, in this pass, we are not handling the case when the definitions
58+
// are PHI inst.
59+
//
60+
//===----------------------------------------------------------------------===//
61+
#include <unordered_set>
62+
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
63+
64+
#include "Hexagon.h"
65+
#include "HexagonInstrInfo.h"
66+
#include "HexagonSubtarget.h"
67+
#include "llvm/ADT/SmallVector.h"
68+
#include "llvm/ADT/StringRef.h"
69+
#include "llvm/CodeGen/MachineBasicBlock.h"
70+
#include "llvm/CodeGen/MachineFunction.h"
71+
#include "llvm/CodeGen/MachineFunctionPass.h"
72+
#include "llvm/CodeGen/MachineInstr.h"
73+
#include "llvm/CodeGen/MachineOperand.h"
74+
#include "llvm/CodeGen/Passes.h"
75+
#include "llvm/Pass.h"
76+
#include "llvm/Support/CommandLine.h"
77+
#include "llvm/Support/Debug.h"
78+
#include "llvm/Support/raw_ostream.h"
79+
#include <map>
80+
#include <vector>
81+
82+
#define DEBUG_TYPE "hexagon-qfp-optimizer"
83+
84+
using namespace llvm;
85+
86+
cl::opt<bool>
87+
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
88+
cl::desc("Disable optimization of Qfloat operations."));
89+
90+
namespace {
91+
const std::map<unsigned short, unsigned short> QFPInstMap{
92+
{Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
93+
{Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
94+
{Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
95+
{Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
96+
{Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
97+
{Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
98+
{Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
99+
{Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
100+
{Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
101+
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
102+
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
103+
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
104+
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
105+
} // namespace
106+
107+
namespace llvm {
108+
109+
FunctionPass *createHexagonQFPOptimizer();
110+
void initializeHexagonQFPOptimizerPass(PassRegistry &);
111+
112+
} // namespace llvm
113+
114+
namespace {
115+
116+
struct HexagonQFPOptimizer : public MachineFunctionPass {
117+
public:
118+
static char ID;
119+
120+
HexagonQFPOptimizer() : MachineFunctionPass(ID) {}
121+
122+
bool runOnMachineFunction(MachineFunction &MF) override;
123+
124+
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
125+
126+
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
127+
128+
void getAnalysisUsage(AnalysisUsage &AU) const override {
129+
AU.setPreservesCFG();
130+
MachineFunctionPass::getAnalysisUsage(AU);
131+
}
132+
133+
private:
134+
const HexagonSubtarget *HST = nullptr;
135+
const HexagonInstrInfo *HII = nullptr;
136+
const MachineRegisterInfo *MRI = nullptr;
137+
};
138+
139+
char HexagonQFPOptimizer::ID = 0;
140+
} // namespace
141+
142+
INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer",
143+
HEXAGON_QFP_OPTIMIZER, false, false)
144+
145+
FunctionPass *llvm::createHexagonQFPOptimizer() {
146+
return new HexagonQFPOptimizer();
147+
}
148+
149+
bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
150+
MachineBasicBlock *MBB) {
151+
152+
// Early exit:
153+
// - if instruction is invalid or has too few operands (QFP ops need 2 sources
154+
// + 1 dest),
155+
// - or does not have a transformation mapping.
156+
if (MI->getNumOperands() < 3)
157+
return false;
158+
auto It = QFPInstMap.find(MI->getOpcode());
159+
if (It == QFPInstMap.end())
160+
return false;
161+
unsigned short InstTy = It->second;
162+
163+
unsigned Op0F = 0;
164+
unsigned Op1F = 0;
165+
// Get the reaching defs of MI, DefMI1 and DefMI2
166+
MachineInstr *DefMI1 = nullptr;
167+
MachineInstr *DefMI2 = nullptr;
168+
169+
if (MI->getOperand(1).isReg())
170+
DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
171+
if (MI->getOperand(2).isReg())
172+
DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
173+
if (!DefMI1 || !DefMI2)
174+
return false;
175+
176+
MachineOperand &Res = MI->getOperand(0);
177+
MachineInstr *Inst1 = nullptr;
178+
MachineInstr *Inst2 = nullptr;
179+
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
180+
DefMI2->dump());
181+
182+
// Get the reaching defs of DefMI
183+
if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
184+
DefMI1->getOperand(1).getReg().isVirtual())
185+
Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
186+
187+
if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
188+
DefMI2->getOperand(1).getReg().isVirtual())
189+
Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
190+
191+
unsigned Def1OP = DefMI1->getOpcode();
192+
unsigned Def2OP = DefMI2->getOpcode();
193+
194+
MachineInstrBuilder MIB;
195+
// Case 1: Both reaching defs of MI are qf to sf/hf conversions
196+
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
197+
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
198+
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
199+
Def2OP == Hexagon::V6_vconv_hf_qf16)) {
200+
201+
// If the reaching defs of DefMI are W register type, we return
202+
if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
203+
MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
204+
&Hexagon::HvxWRRegClass) ||
205+
(Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() &&
206+
MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
207+
&Hexagon::HvxWRRegClass))
208+
return false;
209+
210+
// Analyze the use operands of the conversion to get their KILL status
211+
MachineOperand &Src1 = DefMI1->getOperand(1);
212+
MachineOperand &Src2 = DefMI2->getOperand(1);
213+
214+
Op0F = getKillRegState(Src1.isKill());
215+
Src1.setIsKill(false);
216+
217+
Op1F = getKillRegState(Src2.isKill());
218+
Src2.setIsKill(false);
219+
220+
if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
221+
auto OuterIt = QFPInstMap.find(MI->getOpcode());
222+
if (OuterIt == QFPInstMap.end())
223+
return false;
224+
auto InnerIt = QFPInstMap.find(OuterIt->second);
225+
if (InnerIt == QFPInstMap.end())
226+
return false;
227+
InstTy = InnerIt->second;
228+
}
229+
230+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
231+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
232+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
233+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
234+
return true;
235+
236+
// Case 2: Left operand is conversion to sf/hf
237+
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
238+
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
239+
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
240+
Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
241+
!DefMI2->isPHI() &&
242+
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
243+
244+
if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
245+
&Hexagon::HvxWRRegClass)
246+
return false;
247+
248+
MachineOperand &Src1 = DefMI1->getOperand(1);
249+
MachineOperand &Src2 = MI->getOperand(2);
250+
251+
Op0F = getKillRegState(Src1.isKill());
252+
Src1.setIsKill(false);
253+
Op1F = getKillRegState(Src2.isKill());
254+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
255+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
256+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
257+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
258+
return true;
259+
260+
// Case 2: Left operand is conversion to sf/hf
261+
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
262+
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
263+
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
264+
Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
265+
!DefMI1->isPHI() &&
266+
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
267+
// The second operand of original instruction is converted.
268+
// In "mix" instructions, "qf" operand is always the first operand.
269+
270+
// Caveat: vsub is not commutative w.r.t operands.
271+
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
272+
InstTy == Hexagon::V6_vsub_qf32_mix)
273+
return false;
274+
275+
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
276+
&Hexagon::HvxWRRegClass)
277+
return false;
278+
279+
MachineOperand &Src1 = MI->getOperand(1);
280+
MachineOperand &Src2 = DefMI2->getOperand(1);
281+
282+
Op1F = getKillRegState(Src2.isKill());
283+
Src2.setIsKill(false);
284+
Op0F = getKillRegState(Src1.isKill());
285+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
286+
.addReg(Src2.getReg(), Op1F,
287+
Src2.getSubReg()) // Notice the operands are flipped.
288+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
289+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
290+
return true;
291+
}
292+
293+
return false;
294+
}
295+
296+
bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
297+
298+
bool Changed = false;
299+
300+
if (DisableQFOptimizer)
301+
return Changed;
302+
303+
HST = &MF.getSubtarget<HexagonSubtarget>();
304+
if (!HST->useHVXV68Ops() || !HST->usePackets() ||
305+
skipFunction(MF.getFunction()))
306+
return false;
307+
HII = HST->getInstrInfo();
308+
MRI = &MF.getRegInfo();
309+
310+
MachineFunction::iterator MBBI = MF.begin();
311+
LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
312+
<< " Optimize intermediate conversions ===\n");
313+
while (MBBI != MF.end()) {
314+
MachineBasicBlock *MBB = &*MBBI;
315+
MachineBasicBlock::iterator MII = MBBI->instr_begin();
316+
while (MII != MBBI->instr_end()) {
317+
MachineInstr *MI = &*MII;
318+
++MII; // As MI might be removed.
319+
320+
if (QFPInstMap.count(MI->getOpcode()) &&
321+
MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
322+
MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
323+
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
324+
if (optimizeQfp(MI, MBB)) {
325+
MI->eraseFromParent();
326+
LLVM_DEBUG(dbgs() << "\t....Removing....");
327+
Changed = true;
328+
}
329+
}
330+
}
331+
++MBBI;
332+
}
333+
return Changed;
334+
}

llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() {
220220
initializeHexagonPeepholePass(PR);
221221
initializeHexagonSplitConst32AndConst64Pass(PR);
222222
initializeHexagonVectorPrintPass(PR);
223+
initializeHexagonQFPOptimizerPass(PR);
223224
}
224225

225226
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() {
386387
addPass(createHexagonGenInsert());
387388
if (EnableEarlyIf)
388389
addPass(createHexagonEarlyIfConversion());
390+
addPass(createHexagonQFPOptimizer());
389391
}
390392

391393
return false;

0 commit comments

Comments
 (0)