Skip to content

Commit 2455a6d

Browse files
fhossein-quicrutkoorBrendon CahoonabhikranSumanth Gundapaneni
committed
Hexagon QFP Optimizer
Patch By: Fateme Hosseini Co-authored-by: Rahul Utkoor <quic_rutkoor@quicinc.com> Co-authored-by: Brendon Cahoon <bcahoon@quicinc.com> Co-authored-by: abhikran <abhikran@codeaurora.org> Co-authored-by: Sumanth Gundapaneni <sgundapa@quicinc.com> Co-authored-by: Ikhlas Ajbar <iajbar@quicinc.com> Co-authored-by: Anirudh Sundar <quic_sanirudh@quicinc.com> Co-authored-by: Yashas Andaluri <quic_yandalur@quicinc.com> Co-authored-by: quic-santdas <quic_santdas@quicinc.com>
1 parent 64c511f commit 2455a6d

File tree

12 files changed

+686
-0
lines changed

12 files changed

+686
-0
lines changed

llvm/lib/Target/Hexagon/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
5454
HexagonOptAddrMode.cpp
5555
HexagonOptimizeSZextends.cpp
5656
HexagonPeephole.cpp
57+
HexagonQFPOptimizer.cpp
5758
HexagonRDFOpt.cpp
5859
HexagonRegisterInfo.cpp
5960
HexagonSelectionDAGInfo.cpp

llvm/lib/Target/Hexagon/Hexagon.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
6767
void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
6868
void initializeHexagonVectorPrintPass(PassRegistry &);
6969

70+
void initializeHexagonQFPoptimizerPass(PassRegistry &);
71+
7072
Pass *createHexagonLoopIdiomPass();
7173
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
7274

@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
112114
FunctionPass *createHexagonVectorPrint();
113115
FunctionPass *createHexagonVExtract();
114116
FunctionPass *createHexagonExpandCondsets();
117+
FunctionPass *createHexagonQFPoptimizer();
115118

116119
} // end namespace llvm;
117120

Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
2+
// optimizer ------------------===//
3+
//
4+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
//
8+
//===----------------------------------------------------------------------===//
9+
//
10+
// Basic infrastructure for optimizing intermediate conversion instructions
11+
// generated while performing vector floating point operations.
12+
// Currently run at the starting of the code generation for Hexagon, cleans
13+
// up redundant conversion instructions and replaces the uses of conversion
14+
// with appropriate machine operand. Liveness is preserved after this pass.
15+
//
16+
// @note: The redundant conversion instructions are not eliminated in this pass.
17+
// In this pass, we are only trying to replace the uses of conversion
18+
// instructions with its appropriate QFP instruction. We are leaving the job to
19+
// Dead instruction Elimination pass to remove redundant conversion
20+
// instructions.
21+
//
22+
// Brief overview of working of this QFP optimizer.
23+
// This version of Hexagon QFP optimizer basically iterates over each
24+
// instruction, checks whether if it belongs to hexagon floating point HVX
25+
// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
26+
// definition for the machine operands corresponding to the instruction.
27+
//
28+
// Example:
29+
// MachineInstruction *MI be the HVX vadd instruction
30+
// MI -> $v0 = V6_vadd_sf $v1, $v2
31+
// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
32+
// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
33+
//
34+
// In the above example, DefMI1 and DefMI2 gives the unique definitions
35+
// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
36+
//
37+
// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
38+
// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
39+
// iterates over next instruction.
40+
//
41+
// If one the definitions is conversion instruction then our pass will replace
42+
// the arithmetic instruction with its corresponding mix variant.
43+
// In the above example, if $v1 is conversion instruction
44+
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
45+
// After Transformation:
46+
// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
47+
//
48+
// If both the definitions are conversion instructions then the instruction will
49+
// be replaced with its qf variant
50+
// In the above example, if $v1 and $v2 are conversion instructions
51+
// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
52+
// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
53+
// After Transformation:
54+
// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
55+
// with $v4)
56+
//
57+
// Currently, in this pass, we are not handling the case when the definitions
58+
// are PHI inst.
59+
//
60+
//===----------------------------------------------------------------------===//
61+
#include <unordered_set>
62+
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
63+
64+
#include "Hexagon.h"
65+
#include "HexagonInstrInfo.h"
66+
#include "HexagonSubtarget.h"
67+
#include "llvm/ADT/SmallVector.h"
68+
#include "llvm/ADT/StringRef.h"
69+
#include "llvm/CodeGen/MachineBasicBlock.h"
70+
#include "llvm/CodeGen/MachineFunction.h"
71+
#include "llvm/CodeGen/MachineFunctionPass.h"
72+
#include "llvm/CodeGen/MachineInstr.h"
73+
#include "llvm/CodeGen/MachineOperand.h"
74+
#include "llvm/CodeGen/Passes.h"
75+
#include "llvm/Pass.h"
76+
#include "llvm/Support/CommandLine.h"
77+
#include "llvm/Support/Debug.h"
78+
#include "llvm/Support/raw_ostream.h"
79+
#include <map>
80+
#include <vector>
81+
82+
#define DEBUG_TYPE "hexagon-qfp-optimizer"
83+
84+
using namespace llvm;
85+
86+
cl::opt<bool>
87+
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
88+
cl::desc("Disable optimization of Qfloat operations."));
89+
90+
namespace {
91+
const std::map<unsigned short, unsigned short> QFPInstMap{
92+
{Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
93+
{Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
94+
{Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
95+
{Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
96+
{Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
97+
{Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
98+
{Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
99+
{Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
100+
{Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
101+
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
102+
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
103+
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
104+
{Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
105+
} // namespace
106+
107+
namespace llvm {
108+
109+
FunctionPass *createHexagonQFPoptimizer();
110+
void initializeHexagonQFPoptimizerPass(PassRegistry &);
111+
112+
} // namespace llvm
113+
114+
namespace {
115+
116+
struct HexagonQFPoptimizer : public MachineFunctionPass {
117+
public:
118+
static char ID;
119+
120+
HexagonQFPoptimizer() : MachineFunctionPass(ID) {
121+
for (const auto &entry : QFPInstMap) {
122+
QFPInstSet.insert(entry.first);
123+
QFPInstSet.insert(entry.second);
124+
}
125+
}
126+
127+
bool runOnMachineFunction(MachineFunction &MF) override;
128+
129+
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
130+
131+
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
132+
133+
void getAnalysisUsage(AnalysisUsage &AU) const override {
134+
AU.setPreservesCFG();
135+
MachineFunctionPass::getAnalysisUsage(AU);
136+
}
137+
138+
private:
139+
const HexagonSubtarget *HST = nullptr;
140+
const HexagonInstrInfo *HII = nullptr;
141+
const MachineRegisterInfo *MRI = nullptr;
142+
std::unordered_set<unsigned short> QFPInstSet;
143+
};
144+
145+
char HexagonQFPoptimizer::ID = 0;
146+
} // namespace
147+
148+
INITIALIZE_PASS(HexagonQFPoptimizer, "hexagon-qfp-optimizer",
149+
HEXAGON_QFP_OPTIMIZER, false, false)
150+
151+
FunctionPass *llvm::createHexagonQFPoptimizer() {
152+
return new HexagonQFPoptimizer();
153+
}
154+
155+
bool HexagonQFPoptimizer::optimizeQfp(MachineInstr *MI,
156+
MachineBasicBlock *MBB) {
157+
158+
// Early exit:
159+
// - if instruction is invalid or has too few operands (QFP ops need 2 sources
160+
// + 1 dest),
161+
// - is not part of the QFP instruction set,
162+
// - or does not have a transformation mapping.
163+
if (MI->getNumOperands() < 3)
164+
return false;
165+
if (!QFPInstSet.count(MI->getOpcode()))
166+
return false;
167+
auto It = QFPInstMap.find(MI->getOpcode());
168+
if (It == QFPInstMap.end())
169+
return false;
170+
unsigned short InstTy = It->second;
171+
172+
unsigned Op0F = 0;
173+
unsigned Op1F = 0;
174+
// Get the reaching defs of MI, DefMI1 and DefMI2
175+
MachineInstr *DefMI1 = nullptr;
176+
MachineInstr *DefMI2 = nullptr;
177+
178+
if (MI->getOperand(1).isReg())
179+
DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
180+
if (MI->getOperand(2).isReg())
181+
DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
182+
if (!DefMI1 || !DefMI2)
183+
return false;
184+
185+
MachineOperand &Res = MI->getOperand(0);
186+
MachineInstr *Inst1 = nullptr;
187+
MachineInstr *Inst2 = nullptr;
188+
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
189+
DefMI2->dump());
190+
191+
// Get the reaching defs of DefMI
192+
if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
193+
DefMI1->getOperand(1).getReg().isVirtual())
194+
Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
195+
196+
if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
197+
DefMI2->getOperand(1).getReg().isVirtual())
198+
Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
199+
200+
unsigned Def1OP = DefMI1->getOpcode();
201+
unsigned Def2OP = DefMI2->getOpcode();
202+
203+
MachineInstrBuilder MIB;
204+
// Case 1: Both reaching defs of MI are qf to sf/hf conversions
205+
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
206+
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
207+
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
208+
Def2OP == Hexagon::V6_vconv_hf_qf16)) {
209+
210+
// If the reaching defs of DefMI are W register type, we return
211+
if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
212+
MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
213+
&Hexagon::HvxWRRegClass) ||
214+
(Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() &&
215+
MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
216+
&Hexagon::HvxWRRegClass))
217+
return false;
218+
219+
// Analyze the use operands of the conversion to get their KILL status
220+
MachineOperand &Src1 = DefMI1->getOperand(1);
221+
MachineOperand &Src2 = DefMI2->getOperand(1);
222+
223+
Op0F = getKillRegState(Src1.isKill());
224+
Src1.setIsKill(false);
225+
226+
Op1F = getKillRegState(Src2.isKill());
227+
Src2.setIsKill(false);
228+
229+
if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
230+
auto OuterIt = QFPInstMap.find(MI->getOpcode());
231+
if (OuterIt == QFPInstMap.end())
232+
return false;
233+
auto InnerIt = QFPInstMap.find(OuterIt->second);
234+
if (InnerIt == QFPInstMap.end())
235+
return false;
236+
InstTy = InnerIt->second;
237+
}
238+
239+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
240+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
241+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
242+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
243+
return true;
244+
245+
// Case 2: Left operand is conversion to sf/hf
246+
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
247+
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
248+
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
249+
Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
250+
!DefMI2->isPHI() &&
251+
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
252+
253+
if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
254+
&Hexagon::HvxWRRegClass)
255+
return false;
256+
257+
MachineOperand &Src1 = DefMI1->getOperand(1);
258+
MachineOperand &Src2 = MI->getOperand(2);
259+
260+
Op0F = getKillRegState(Src1.isKill());
261+
Src1.setIsKill(false);
262+
Op1F = getKillRegState(Src2.isKill());
263+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
264+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg())
265+
.addReg(Src2.getReg(), Op1F, Src2.getSubReg());
266+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
267+
return true;
268+
269+
// Case 2: Left operand is conversion to sf/hf
270+
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
271+
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
272+
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
273+
Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
274+
!DefMI1->isPHI() &&
275+
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
276+
// The second operand of original instruction is converted.
277+
// In "mix" instructions, "qf" operand is always the first operand.
278+
279+
// Caveat: vsub is not commutative w.r.t operands.
280+
if (InstTy == Hexagon::V6_vsub_qf16_mix ||
281+
InstTy == Hexagon::V6_vsub_qf32_mix)
282+
return false;
283+
284+
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
285+
&Hexagon::HvxWRRegClass)
286+
return false;
287+
288+
MachineOperand &Src1 = MI->getOperand(1);
289+
MachineOperand &Src2 = DefMI2->getOperand(1);
290+
291+
Op1F = getKillRegState(Src2.isKill());
292+
Src2.setIsKill(false);
293+
Op0F = getKillRegState(Src1.isKill());
294+
MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
295+
.addReg(Src2.getReg(), Op1F,
296+
Src2.getSubReg()) // Notice the operands are flipped.
297+
.addReg(Src1.getReg(), Op0F, Src1.getSubReg());
298+
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
299+
return true;
300+
}
301+
302+
return false;
303+
}
304+
305+
bool HexagonQFPoptimizer::runOnMachineFunction(MachineFunction &MF) {
306+
307+
bool Changed = false;
308+
309+
if (DisableQFOptimizer)
310+
return Changed;
311+
312+
HST = &MF.getSubtarget<HexagonSubtarget>();
313+
if (!HST->useHVXV68Ops() || !HST->usePackets() ||
314+
skipFunction(MF.getFunction()))
315+
return false;
316+
HII = HST->getInstrInfo();
317+
MRI = &MF.getRegInfo();
318+
319+
MachineFunction::iterator MBBI = MF.begin();
320+
LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
321+
<< " Optimize intermediate conversions ===\n");
322+
while (MBBI != MF.end()) {
323+
MachineBasicBlock *MBB = &*MBBI;
324+
MachineBasicBlock::iterator MII = MBBI->instr_begin();
325+
while (MII != MBBI->instr_end()) {
326+
MachineInstr *MI = &*MII;
327+
++MII; // As MI might be removed.
328+
329+
if (QFPInstSet.count(MI->getOpcode()) &&
330+
MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
331+
MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
332+
LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
333+
if (optimizeQfp(MI, MBB)) {
334+
MI->eraseFromParent();
335+
LLVM_DEBUG(dbgs() << "\t....Removing....");
336+
Changed = true;
337+
}
338+
}
339+
}
340+
++MBBI;
341+
}
342+
return Changed;
343+
}

llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() {
220220
initializeHexagonPeepholePass(PR);
221221
initializeHexagonSplitConst32AndConst64Pass(PR);
222222
initializeHexagonVectorPrintPass(PR);
223+
initializeHexagonQFPoptimizerPass(PR);
223224
}
224225

225226
HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() {
386387
addPass(createHexagonGenInsert());
387388
if (EnableEarlyIf)
388389
addPass(createHexagonEarlyIfConversion());
390+
addPass(createHexagonQFPoptimizer());
389391
}
390392

391393
return false;

0 commit comments

Comments
 (0)