|
| 1 | +//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions |
| 2 | +// optimizer ------------------===// |
| 3 | +// |
| 4 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | +// See https://llvm.org/LICENSE.txt for license information. |
| 6 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | +// |
| 10 | +// Basic infrastructure for optimizing intermediate conversion instructions |
| 11 | +// generated while performing vector floating point operations. |
| 12 | +// Currently run at the starting of the code generation for Hexagon, cleans |
| 13 | +// up redundant conversion instructions and replaces the uses of conversion |
| 14 | +// with appropriate machine operand. Liveness is preserved after this pass. |
| 15 | +// |
| 16 | +// @note: The redundant conversion instructions are not eliminated in this pass. |
| 17 | +// In this pass, we are only trying to replace the uses of conversion |
| 18 | +// instructions with its appropriate QFP instruction. We are leaving the job to |
| 19 | +// Dead instruction Elimination pass to remove redundant conversion |
| 20 | +// instructions. |
| 21 | +// |
| 22 | +// Brief overview of working of this QFP optimizer. |
| 23 | +// This version of Hexagon QFP optimizer basically iterates over each |
| 24 | +// instruction, checks whether if it belongs to hexagon floating point HVX |
| 25 | +// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique |
| 26 | +// definition for the machine operands corresponding to the instruction. |
| 27 | +// |
| 28 | +// Example: |
| 29 | +// MachineInstruction *MI be the HVX vadd instruction |
| 30 | +// MI -> $v0 = V6_vadd_sf $v1, $v2 |
| 31 | +// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg()); |
| 32 | +// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg()); |
| 33 | +// |
| 34 | +// In the above example, DefMI1 and DefMI2 gives the unique definitions |
| 35 | +// corresponding to the operands($v1 and &v2 respectively) of instruction MI. |
| 36 | +// |
| 37 | +// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32, |
| 38 | +// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and |
| 39 | +// iterates over next instruction. |
| 40 | +// |
| 41 | +// If one the definitions is conversion instruction then our pass will replace |
| 42 | +// the arithmetic instruction with its corresponding mix variant. |
| 43 | +// In the above example, if $v1 is conversion instruction |
| 44 | +// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3 |
| 45 | +// After Transformation: |
| 46 | +// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3) |
| 47 | +// |
| 48 | +// If both the definitions are conversion instructions then the instruction will |
| 49 | +// be replaced with its qf variant |
| 50 | +// In the above example, if $v1 and $v2 are conversion instructions |
| 51 | +// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3 |
| 52 | +// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4 |
| 53 | +// After Transformation: |
| 54 | +// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced |
| 55 | +// with $v4) |
| 56 | +// |
| 57 | +// Currently, in this pass, we are not handling the case when the definitions |
| 58 | +// are PHI inst. |
| 59 | +// |
| 60 | +//===----------------------------------------------------------------------===// |
| 61 | +#include <unordered_set> |
| 62 | +#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass" |
| 63 | + |
| 64 | +#include "Hexagon.h" |
| 65 | +#include "HexagonInstrInfo.h" |
| 66 | +#include "HexagonSubtarget.h" |
| 67 | +#include "llvm/ADT/SmallVector.h" |
| 68 | +#include "llvm/ADT/StringRef.h" |
| 69 | +#include "llvm/CodeGen/MachineBasicBlock.h" |
| 70 | +#include "llvm/CodeGen/MachineFunction.h" |
| 71 | +#include "llvm/CodeGen/MachineFunctionPass.h" |
| 72 | +#include "llvm/CodeGen/MachineInstr.h" |
| 73 | +#include "llvm/CodeGen/MachineOperand.h" |
| 74 | +#include "llvm/CodeGen/Passes.h" |
| 75 | +#include "llvm/Pass.h" |
| 76 | +#include "llvm/Support/CommandLine.h" |
| 77 | +#include "llvm/Support/Debug.h" |
| 78 | +#include "llvm/Support/raw_ostream.h" |
| 79 | +#include <map> |
| 80 | +#include <vector> |
| 81 | + |
| 82 | +#define DEBUG_TYPE "hexagon-qfp-optimizer" |
| 83 | + |
| 84 | +using namespace llvm; |
| 85 | + |
| 86 | +cl::opt<bool> |
| 87 | + DisableQFOptimizer("disable-qfp-opt", cl::init(false), |
| 88 | + cl::desc("Disable optimization of Qfloat operations.")); |
| 89 | + |
| 90 | +namespace { |
| 91 | +const std::map<unsigned short, unsigned short> QFPInstMap{ |
| 92 | + {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix}, |
| 93 | + {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16}, |
| 94 | + {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix}, |
| 95 | + {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32}, |
| 96 | + {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix}, |
| 97 | + {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16}, |
| 98 | + {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix}, |
| 99 | + {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32}, |
| 100 | + {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf}, |
| 101 | + {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16}, |
| 102 | + {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf}, |
| 103 | + {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16}, |
| 104 | + {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}}; |
| 105 | +} // namespace |
| 106 | + |
| 107 | +namespace llvm { |
| 108 | + |
| 109 | +FunctionPass *createHexagonQFPOptimizer(); |
| 110 | +void initializeHexagonQFPOptimizerPass(PassRegistry &); |
| 111 | + |
| 112 | +} // namespace llvm |
| 113 | + |
| 114 | +namespace { |
| 115 | + |
| 116 | +struct HexagonQFPOptimizer : public MachineFunctionPass { |
| 117 | +public: |
| 118 | + static char ID; |
| 119 | + |
| 120 | + HexagonQFPOptimizer() : MachineFunctionPass(ID) {} |
| 121 | + |
| 122 | + bool runOnMachineFunction(MachineFunction &MF) override; |
| 123 | + |
| 124 | + bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB); |
| 125 | + |
| 126 | + StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; } |
| 127 | + |
| 128 | + void getAnalysisUsage(AnalysisUsage &AU) const override { |
| 129 | + AU.setPreservesCFG(); |
| 130 | + MachineFunctionPass::getAnalysisUsage(AU); |
| 131 | + } |
| 132 | + |
| 133 | +private: |
| 134 | + const HexagonSubtarget *HST = nullptr; |
| 135 | + const HexagonInstrInfo *HII = nullptr; |
| 136 | + const MachineRegisterInfo *MRI = nullptr; |
| 137 | +}; |
| 138 | + |
| 139 | +char HexagonQFPOptimizer::ID = 0; |
| 140 | +} // namespace |
| 141 | + |
| 142 | +INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer", |
| 143 | + HEXAGON_QFP_OPTIMIZER, false, false) |
| 144 | + |
| 145 | +FunctionPass *llvm::createHexagonQFPOptimizer() { |
| 146 | + return new HexagonQFPOptimizer(); |
| 147 | +} |
| 148 | + |
| 149 | +bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI, |
| 150 | + MachineBasicBlock *MBB) { |
| 151 | + |
| 152 | + // Early exit: |
| 153 | + // - if instruction is invalid or has too few operands (QFP ops need 2 sources |
| 154 | + // + 1 dest), |
| 155 | + // - or does not have a transformation mapping. |
| 156 | + if (MI->getNumOperands() < 3) |
| 157 | + return false; |
| 158 | + auto It = QFPInstMap.find(MI->getOpcode()); |
| 159 | + if (It == QFPInstMap.end()) |
| 160 | + return false; |
| 161 | + unsigned short InstTy = It->second; |
| 162 | + |
| 163 | + unsigned Op0F = 0; |
| 164 | + unsigned Op1F = 0; |
| 165 | + // Get the reaching defs of MI, DefMI1 and DefMI2 |
| 166 | + MachineInstr *DefMI1 = nullptr; |
| 167 | + MachineInstr *DefMI2 = nullptr; |
| 168 | + |
| 169 | + if (MI->getOperand(1).isReg()) |
| 170 | + DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg()); |
| 171 | + if (MI->getOperand(2).isReg()) |
| 172 | + DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg()); |
| 173 | + if (!DefMI1 || !DefMI2) |
| 174 | + return false; |
| 175 | + |
| 176 | + MachineOperand &Res = MI->getOperand(0); |
| 177 | + MachineInstr *Inst1 = nullptr; |
| 178 | + MachineInstr *Inst2 = nullptr; |
| 179 | + LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump(); |
| 180 | + DefMI2->dump()); |
| 181 | + |
| 182 | + // Get the reaching defs of DefMI |
| 183 | + if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() && |
| 184 | + DefMI1->getOperand(1).getReg().isVirtual()) |
| 185 | + Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg()); |
| 186 | + |
| 187 | + if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() && |
| 188 | + DefMI2->getOperand(1).getReg().isVirtual()) |
| 189 | + Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg()); |
| 190 | + |
| 191 | + unsigned Def1OP = DefMI1->getOpcode(); |
| 192 | + unsigned Def2OP = DefMI2->getOpcode(); |
| 193 | + |
| 194 | + MachineInstrBuilder MIB; |
| 195 | + // Case 1: Both reaching defs of MI are qf to sf/hf conversions |
| 196 | + if ((Def1OP == Hexagon::V6_vconv_sf_qf32 && |
| 197 | + Def2OP == Hexagon::V6_vconv_sf_qf32) || |
| 198 | + (Def1OP == Hexagon::V6_vconv_hf_qf16 && |
| 199 | + Def2OP == Hexagon::V6_vconv_hf_qf16)) { |
| 200 | + |
| 201 | + // If the reaching defs of DefMI are W register type, we return |
| 202 | + if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() && |
| 203 | + MRI->getRegClass(Inst1->getOperand(0).getReg()) == |
| 204 | + &Hexagon::HvxWRRegClass) || |
| 205 | + (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() && |
| 206 | + MRI->getRegClass(Inst2->getOperand(0).getReg()) == |
| 207 | + &Hexagon::HvxWRRegClass)) |
| 208 | + return false; |
| 209 | + |
| 210 | + // Analyze the use operands of the conversion to get their KILL status |
| 211 | + MachineOperand &Src1 = DefMI1->getOperand(1); |
| 212 | + MachineOperand &Src2 = DefMI2->getOperand(1); |
| 213 | + |
| 214 | + Op0F = getKillRegState(Src1.isKill()); |
| 215 | + Src1.setIsKill(false); |
| 216 | + |
| 217 | + Op1F = getKillRegState(Src2.isKill()); |
| 218 | + Src2.setIsKill(false); |
| 219 | + |
| 220 | + if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) { |
| 221 | + auto OuterIt = QFPInstMap.find(MI->getOpcode()); |
| 222 | + if (OuterIt == QFPInstMap.end()) |
| 223 | + return false; |
| 224 | + auto InnerIt = QFPInstMap.find(OuterIt->second); |
| 225 | + if (InnerIt == QFPInstMap.end()) |
| 226 | + return false; |
| 227 | + InstTy = InnerIt->second; |
| 228 | + } |
| 229 | + |
| 230 | + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) |
| 231 | + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) |
| 232 | + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); |
| 233 | + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); |
| 234 | + return true; |
| 235 | + |
| 236 | + // Case 2: Left operand is conversion to sf/hf |
| 237 | + } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 && |
| 238 | + Def2OP != Hexagon::V6_vconv_sf_qf32) || |
| 239 | + (Def1OP == Hexagon::V6_vconv_hf_qf16 && |
| 240 | + Def2OP != Hexagon::V6_vconv_hf_qf16)) && |
| 241 | + !DefMI2->isPHI() && |
| 242 | + (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { |
| 243 | + |
| 244 | + if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) == |
| 245 | + &Hexagon::HvxWRRegClass) |
| 246 | + return false; |
| 247 | + |
| 248 | + MachineOperand &Src1 = DefMI1->getOperand(1); |
| 249 | + MachineOperand &Src2 = MI->getOperand(2); |
| 250 | + |
| 251 | + Op0F = getKillRegState(Src1.isKill()); |
| 252 | + Src1.setIsKill(false); |
| 253 | + Op1F = getKillRegState(Src2.isKill()); |
| 254 | + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) |
| 255 | + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()) |
| 256 | + .addReg(Src2.getReg(), Op1F, Src2.getSubReg()); |
| 257 | + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); |
| 258 | + return true; |
| 259 | + |
| 260 | + // Case 2: Left operand is conversion to sf/hf |
| 261 | + } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 && |
| 262 | + Def2OP == Hexagon::V6_vconv_sf_qf32) || |
| 263 | + (Def1OP != Hexagon::V6_vconv_hf_qf16 && |
| 264 | + Def2OP == Hexagon::V6_vconv_hf_qf16)) && |
| 265 | + !DefMI1->isPHI() && |
| 266 | + (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) { |
| 267 | + // The second operand of original instruction is converted. |
| 268 | + // In "mix" instructions, "qf" operand is always the first operand. |
| 269 | + |
| 270 | + // Caveat: vsub is not commutative w.r.t operands. |
| 271 | + if (InstTy == Hexagon::V6_vsub_qf16_mix || |
| 272 | + InstTy == Hexagon::V6_vsub_qf32_mix) |
| 273 | + return false; |
| 274 | + |
| 275 | + if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) == |
| 276 | + &Hexagon::HvxWRRegClass) |
| 277 | + return false; |
| 278 | + |
| 279 | + MachineOperand &Src1 = MI->getOperand(1); |
| 280 | + MachineOperand &Src2 = DefMI2->getOperand(1); |
| 281 | + |
| 282 | + Op1F = getKillRegState(Src2.isKill()); |
| 283 | + Src2.setIsKill(false); |
| 284 | + Op0F = getKillRegState(Src1.isKill()); |
| 285 | + MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg()) |
| 286 | + .addReg(Src2.getReg(), Op1F, |
| 287 | + Src2.getSubReg()) // Notice the operands are flipped. |
| 288 | + .addReg(Src1.getReg(), Op0F, Src1.getSubReg()); |
| 289 | + LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump()); |
| 290 | + return true; |
| 291 | + } |
| 292 | + |
| 293 | + return false; |
| 294 | +} |
| 295 | + |
| 296 | +bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) { |
| 297 | + |
| 298 | + bool Changed = false; |
| 299 | + |
| 300 | + if (DisableQFOptimizer) |
| 301 | + return Changed; |
| 302 | + |
| 303 | + HST = &MF.getSubtarget<HexagonSubtarget>(); |
| 304 | + if (!HST->useHVXV68Ops() || !HST->usePackets() || |
| 305 | + skipFunction(MF.getFunction())) |
| 306 | + return false; |
| 307 | + HII = HST->getInstrInfo(); |
| 308 | + MRI = &MF.getRegInfo(); |
| 309 | + |
| 310 | + MachineFunction::iterator MBBI = MF.begin(); |
| 311 | + LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName() |
| 312 | + << " Optimize intermediate conversions ===\n"); |
| 313 | + while (MBBI != MF.end()) { |
| 314 | + MachineBasicBlock *MBB = &*MBBI; |
| 315 | + MachineBasicBlock::iterator MII = MBBI->instr_begin(); |
| 316 | + while (MII != MBBI->instr_end()) { |
| 317 | + MachineInstr *MI = &*MII; |
| 318 | + ++MII; // As MI might be removed. |
| 319 | + |
| 320 | + if (QFPInstMap.count(MI->getOpcode()) && |
| 321 | + MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 && |
| 322 | + MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) { |
| 323 | + LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump()); |
| 324 | + if (optimizeQfp(MI, MBB)) { |
| 325 | + MI->eraseFromParent(); |
| 326 | + LLVM_DEBUG(dbgs() << "\t....Removing...."); |
| 327 | + Changed = true; |
| 328 | + } |
| 329 | + } |
| 330 | + } |
| 331 | + ++MBBI; |
| 332 | + } |
| 333 | + return Changed; |
| 334 | +} |
0 commit comments