1212// / are encoded using the EVEX prefix and if possible replaces them by their
1313// / corresponding VEX encoding which is usually shorter by 2 bytes.
1414// / EVEX instructions may be encoded via the VEX prefix when the AVX-512
15- // / instruction has a corresponding AVX/AVX2 opcode, when vector length
16- // / accessed by instruction is less than 512 bits and when it does not use
17- // the xmm or the mask registers or xmm/ymm registers with indexes higher than 15.
15+ // / instruction has a corresponding AVX/AVX2 opcode, when vector length
16+ // / accessed by instruction is less than 512 bits and when it does not use
17+ // the xmm or the mask registers or xmm/ymm registers with indexes higher
18+ // than 15.
1819// / The pass applies code reduction on the generated code for AVX-512 instrs.
1920//
2021// ===----------------------------------------------------------------------===//
@@ -39,16 +40,16 @@ using namespace llvm;
3940
4041// Including the generated EVEX2VEX tables.
4142struct X86EvexToVexCompressTableEntry {
42- uint16_t EvexOpcode ;
43- uint16_t VexOpcode ;
43+ uint16_t EvexOpc ;
44+ uint16_t VexOpc ;
4445
4546 bool operator <(const X86EvexToVexCompressTableEntry &RHS) const {
46- return EvexOpcode < RHS.EvexOpcode ;
47+ return EvexOpc < RHS.EvexOpc ;
4748 }
4849
4950 friend bool operator <(const X86EvexToVexCompressTableEntry &TE,
5051 unsigned Opc) {
51- return TE.EvexOpcode < Opc;
52+ return TE.EvexOpc < Opc;
5253 }
5354};
5455#include " X86GenEVEX2VEXTables.inc"
@@ -61,16 +62,9 @@ struct X86EvexToVexCompressTableEntry {
6162namespace {
6263
6364class EvexToVexInstPass : public MachineFunctionPass {
64-
65- // / For EVEX instructions that can be encoded using VEX encoding, replace
66- // / them by the VEX encoding in order to reduce size.
67- bool CompressEvexToVexImpl (MachineInstr &MI) const ;
68-
6965public:
7066 static char ID;
71-
72- EvexToVexInstPass () : MachineFunctionPass(ID) { }
73-
67+ EvexToVexInstPass () : MachineFunctionPass(ID) {}
7468 StringRef getPassName () const override { return EVEX2VEX_DESC; }
7569
7670 // / Loop over all of the basic blocks, replacing EVEX instructions
@@ -82,53 +76,23 @@ class EvexToVexInstPass : public MachineFunctionPass {
8276 return MachineFunctionProperties ().set (
8377 MachineFunctionProperties::Property::NoVRegs);
8478 }
85-
86- private:
87- // / Machine instruction info used throughout the class.
88- const X86InstrInfo *TII = nullptr ;
89-
90- const X86Subtarget *ST = nullptr ;
9179};
9280
9381} // end anonymous namespace
9482
9583char EvexToVexInstPass::ID = 0 ;
9684
97- bool EvexToVexInstPass::runOnMachineFunction (MachineFunction &MF) {
98- TII = MF.getSubtarget <X86Subtarget>().getInstrInfo ();
99-
100- ST = &MF.getSubtarget <X86Subtarget>();
101- if (!ST->hasAVX512 ())
102- return false ;
103-
104- bool Changed = false ;
105-
106- // / Go over all basic blocks in function and replace
107- // / EVEX encoded instrs by VEX encoding when possible.
108- for (MachineBasicBlock &MBB : MF) {
109-
110- // Traverse the basic block.
111- for (MachineInstr &MI : MBB)
112- Changed |= CompressEvexToVexImpl (MI);
113- }
114-
115- return Changed;
116- }
117-
11885static bool usesExtendedRegister (const MachineInstr &MI) {
11986 auto isHiRegIdx = [](unsigned Reg) {
12087 // Check for XMM register with indexes between 16 - 31.
12188 if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
12289 return true ;
123-
12490 // Check for YMM register with indexes between 16 - 31.
12591 if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
12692 return true ;
127-
12893 // Check for GPR with indexes between 16 - 31.
12994 if (X86II::isApxExtendedReg (Reg))
13095 return true ;
131-
13296 return false ;
13397 };
13498
@@ -139,32 +103,67 @@ static bool usesExtendedRegister(const MachineInstr &MI) {
139103 continue ;
140104
141105 Register Reg = MO.getReg ();
142-
143- assert (!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31) &&
106+ assert (!X86II::isZMMReg (Reg) &&
144107 " ZMM instructions should not be in the EVEX->VEX tables" );
145-
146108 if (isHiRegIdx (Reg))
147109 return true ;
148110 }
149111
150112 return false ;
151113}
152114
115+ static bool checkVEXInstPredicate (unsigned EvexOpc, const X86Subtarget &ST) {
116+ switch (EvexOpc) {
117+ default :
118+ return true ;
119+ case X86::VCVTNEPS2BF16Z128rm:
120+ case X86::VCVTNEPS2BF16Z128rr:
121+ case X86::VCVTNEPS2BF16Z256rm:
122+ case X86::VCVTNEPS2BF16Z256rr:
123+ return ST.hasAVXNECONVERT ();
124+ case X86::VPDPBUSDSZ128m:
125+ case X86::VPDPBUSDSZ128r:
126+ case X86::VPDPBUSDSZ256m:
127+ case X86::VPDPBUSDSZ256r:
128+ case X86::VPDPBUSDZ128m:
129+ case X86::VPDPBUSDZ128r:
130+ case X86::VPDPBUSDZ256m:
131+ case X86::VPDPBUSDZ256r:
132+ case X86::VPDPWSSDSZ128m:
133+ case X86::VPDPWSSDSZ128r:
134+ case X86::VPDPWSSDSZ256m:
135+ case X86::VPDPWSSDSZ256r:
136+ case X86::VPDPWSSDZ128m:
137+ case X86::VPDPWSSDZ128r:
138+ case X86::VPDPWSSDZ256m:
139+ case X86::VPDPWSSDZ256r:
140+ return ST.hasAVXVNNI ();
141+ case X86::VPMADD52HUQZ128m:
142+ case X86::VPMADD52HUQZ128r:
143+ case X86::VPMADD52HUQZ256m:
144+ case X86::VPMADD52HUQZ256r:
145+ case X86::VPMADD52LUQZ128m:
146+ case X86::VPMADD52LUQZ128r:
147+ case X86::VPMADD52LUQZ256m:
148+ case X86::VPMADD52LUQZ256r:
149+ return ST.hasAVXIFMA ();
150+ }
151+ }
152+
153153// Do any custom cleanup needed to finalize the conversion.
154- static bool performCustomAdjustments (MachineInstr &MI, unsigned NewOpc,
155- const X86Subtarget *ST) {
156- (void )NewOpc;
154+ static bool performCustomAdjustments (MachineInstr &MI, unsigned VexOpc) {
155+ (void )VexOpc;
157156 unsigned Opc = MI.getOpcode ();
158157 switch (Opc) {
159158 case X86::VALIGNDZ128rri:
160159 case X86::VALIGNDZ128rmi:
161160 case X86::VALIGNQZ128rri:
162161 case X86::VALIGNQZ128rmi: {
163- assert ((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
162+ assert ((VexOpc == X86::VPALIGNRrri || VexOpc == X86::VPALIGNRrmi) &&
164163 " Unexpected new opcode!" );
165- unsigned Scale = (Opc == X86::VALIGNQZ128rri ||
166- Opc == X86::VALIGNQZ128rmi) ? 8 : 4 ;
167- MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands ()- 1 );
164+ unsigned Scale =
165+ (Opc == X86::VALIGNQZ128rri || Opc == X86::VALIGNQZ128rmi) ? 8 : 4 ;
166+ MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands () - 1 );
168167 Imm.setImm (Imm.getImm () * Scale);
169168 break ;
170169 }
@@ -176,10 +175,10 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
176175 case X86::VSHUFI32X4Z256rri:
177176 case X86::VSHUFI64X2Z256rmi:
178177 case X86::VSHUFI64X2Z256rri: {
179- assert ((NewOpc == X86::VPERM2F128rr || NewOpc == X86::VPERM2I128rr ||
180- NewOpc == X86::VPERM2F128rm || NewOpc == X86::VPERM2I128rm) &&
178+ assert ((VexOpc == X86::VPERM2F128rr || VexOpc == X86::VPERM2I128rr ||
179+ VexOpc == X86::VPERM2F128rm || VexOpc == X86::VPERM2I128rm) &&
181180 " Unexpected new opcode!" );
182- MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands ()- 1 );
181+ MachineOperand &Imm = MI.getOperand (MI.getNumExplicitOperands () - 1 );
183182 int64_t ImmVal = Imm.getImm ();
184183 // Set bit 5, move bit 1 to bit 4, copy bit 0.
185184 Imm.setImm (0x20 | ((ImmVal & 2 ) << 3 ) | (ImmVal & 1 ));
@@ -212,18 +211,16 @@ static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc,
212211 return true ;
213212}
214213
215-
216214// For EVEX instructions that can be encoded using VEX encoding
217215// replace them by the VEX encoding in order to reduce size.
218- bool EvexToVexInstPass:: CompressEvexToVexImpl (MachineInstr &MI) const {
216+ static bool CompressEvexToVexImpl (MachineInstr &MI, const X86Subtarget &ST) {
219217 // VEX format.
220218 // # of bytes: 0,2,3 1 1 0,1 0,1,2,4 0,1
221219 // [Prefixes] [VEX] OPCODE ModR/M [SIB] [DISP] [IMM]
222220 //
223221 // EVEX format.
224222 // # of bytes: 4 1 1 1 4 / 1 1
225223 // [Prefixes] EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate]
226-
227224 const MCInstrDesc &Desc = MI.getDesc ();
228225
229226 // Check for EVEX instructions only.
@@ -241,6 +238,29 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
241238 if (Desc.TSFlags & X86II::EVEX_L2)
242239 return false ;
243240
241+ // Use the VEX.L bit to select the 128 or 256-bit table.
242+ ArrayRef<X86EvexToVexCompressTableEntry> Table =
243+ (Desc.TSFlags & X86II::VEX_L) ? ArrayRef (X86EvexToVex256CompressTable)
244+ : ArrayRef (X86EvexToVex128CompressTable);
245+
246+ unsigned EvexOpc = MI.getOpcode ();
247+ const auto *I = llvm::lower_bound (Table, EvexOpc);
248+ if (I == Table.end () || I->EvexOpc != EvexOpc)
249+ return false ;
250+
251+ if (usesExtendedRegister (MI))
252+ return false ;
253+ if (!checkVEXInstPredicate (EvexOpc, ST))
254+ return false ;
255+ if (!performCustomAdjustments (MI, I->VexOpc ))
256+ return false ;
257+
258+ MI.setDesc (ST.getInstrInfo ()->get (I->VexOpc ));
259+ MI.setAsmPrinterFlag (X86::AC_EVEX_2_VEX);
260+ return true ;
261+ }
262+
263+ bool EvexToVexInstPass::runOnMachineFunction (MachineFunction &MF) {
244264#ifndef NDEBUG
245265 // Make sure the tables are sorted.
246266 static std::atomic<bool > TableChecked (false );
@@ -252,30 +272,21 @@ bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
252272 TableChecked.store (true , std::memory_order_relaxed);
253273 }
254274#endif
255-
256- // Use the VEX.L bit to select the 128 or 256-bit table.
257- ArrayRef<X86EvexToVexCompressTableEntry> Table =
258- (Desc.TSFlags & X86II::VEX_L) ? ArrayRef (X86EvexToVex256CompressTable)
259- : ArrayRef (X86EvexToVex128CompressTable);
260-
261- const auto *I = llvm::lower_bound (Table, MI.getOpcode ());
262- if (I == Table.end () || I->EvexOpcode != MI.getOpcode ())
275+ const X86Subtarget &ST = MF.getSubtarget <X86Subtarget>();
276+ if (!ST.hasAVX512 ())
263277 return false ;
264278
265- unsigned NewOpc = I->VexOpcode ;
266-
267- if (usesExtendedRegister (MI))
268- return false ;
269-
270- if (!CheckVEXInstPredicate (MI, ST))
271- return false ;
279+ bool Changed = false ;
272280
273- if (!performCustomAdjustments (MI, NewOpc, ST))
274- return false ;
281+ // / Go over all basic blocks in function and replace
282+ // / EVEX encoded instrs by VEX encoding when possible.
283+ for (MachineBasicBlock &MBB : MF) {
284+ // Traverse the basic block.
285+ for (MachineInstr &MI : MBB)
286+ Changed |= CompressEvexToVexImpl (MI, ST);
287+ }
275288
276- MI.setDesc (TII->get (NewOpc));
277- MI.setAsmPrinterFlag (X86::AC_EVEX_2_VEX);
278- return true ;
289+ return Changed;
279290}
280291
281292INITIALIZE_PASS (EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false , false )
0 commit comments