@@ -61,6 +61,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
6161 }
6262
6363private:
64+ bool tryToReduceVL (MachineInstr &MI) const ;
6465 bool convertToVLMAX (MachineInstr &MI) const ;
6566 bool convertToWholeRegister (MachineInstr &MI) const ;
6667 bool convertToUnmasked (MachineInstr &MI) const ;
@@ -81,6 +82,96 @@ char RISCVVectorPeephole::ID = 0;
8182INITIALIZE_PASS (RISCVVectorPeephole, DEBUG_TYPE, " RISC-V Fold Masks" , false ,
8283 false )
8384
85+ // / Given two VL operands, do we know that LHS <= RHS?
86+ static bool isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
87+ if (LHS.isReg () && RHS.isReg () && LHS.getReg ().isVirtual () &&
88+ LHS.getReg () == RHS.getReg ())
89+ return true ;
90+ if (RHS.isImm () && RHS.getImm () == RISCV::VLMaxSentinel)
91+ return true ;
92+ if (LHS.isImm () && LHS.getImm () == RISCV::VLMaxSentinel)
93+ return false ;
94+ if (!LHS.isImm () || !RHS.isImm ())
95+ return false ;
96+ return LHS.getImm () <= RHS.getImm ();
97+ }
98+
99+ static unsigned getSEWLMULRatio (const MachineInstr &MI) {
100+ RISCVII::VLMUL LMUL = RISCVII::getLMul (MI.getDesc ().TSFlags );
101+ unsigned Log2SEW = MI.getOperand (RISCVII::getSEWOpNum (MI.getDesc ())).getImm ();
102+ return RISCVVType::getSEWLMULRatio (1 << Log2SEW, LMUL);
103+ }
104+
105+ // Attempt to reduce the VL of an instruction whose sole use is feeding a
106+ // instruction with a narrower VL. This currently works backwards from the
107+ // user instruction (which might have a smaller VL).
108+ bool RISCVVectorPeephole::tryToReduceVL (MachineInstr &MI) const {
109+ // Note that the goal here is a bit multifaceted.
110+ // 1) For store's reducing the VL of the value being stored may help to
111+ // reduce VL toggles. This is somewhat of an artifact of the fact we
112+ // promote arithmetic instructions but VL predicate stores.
113+ // 2) For vmv.v.v reducing VL eagerly on the source instruction allows us
114+ // to share code with the foldVMV_V_V transform below.
115+ //
116+ // Note that to the best of our knowledge, reducing VL is generally not
117+ // a significant win on real hardware unless we can also reduce LMUL which
118+ // this code doesn't try to do.
119+ //
120+ // TODO: We can handle a bunch more instructions here, and probably
121+ // recurse backwards through operands too.
122+ unsigned SrcIdx = 0 ;
123+ switch (RISCV::getRVVMCOpcode (MI.getOpcode ())) {
124+ default :
125+ return false ;
126+ case RISCV::VSE8_V:
127+ case RISCV::VSE16_V:
128+ case RISCV::VSE32_V:
129+ case RISCV::VSE64_V:
130+ break ;
131+ case RISCV::VMV_V_V:
132+ SrcIdx = 2 ;
133+ break ;
134+ }
135+
136+ MachineOperand &VL = MI.getOperand (RISCVII::getVLOpNum (MI.getDesc ()));
137+ if (VL.isImm () && VL.getImm () == RISCV::VLMaxSentinel)
138+ return false ;
139+
140+ Register SrcReg = MI.getOperand (SrcIdx).getReg ();
141+ // Note: one *use*, not one *user*.
142+ if (!MRI->hasOneUse (SrcReg))
143+ return false ;
144+
145+ MachineInstr *Src = MRI->getVRegDef (SrcReg);
146+ if (!Src || Src->hasUnmodeledSideEffects () ||
147+ Src->getParent () != MI.getParent () || Src->getNumDefs () != 1 ||
148+ !RISCVII::hasVLOp (Src->getDesc ().TSFlags ) ||
149+ !RISCVII::hasSEWOp (Src->getDesc ().TSFlags ))
150+ return false ;
151+
152+ // Src needs to have the same VLMAX as MI
153+ if (getSEWLMULRatio (MI) != getSEWLMULRatio (*Src))
154+ return false ;
155+
156+ bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult (
157+ TII->get (RISCV::getRVVMCOpcode (Src->getOpcode ())).TSFlags );
158+ if (ActiveElementsAffectResult || Src->mayRaiseFPException ())
159+ return false ;
160+
161+ MachineOperand &SrcVL = Src->getOperand (RISCVII::getVLOpNum (Src->getDesc ()));
162+ if (VL.isIdenticalTo (SrcVL) || !isVLKnownLE (VL, SrcVL))
163+ return false ;
164+
165+ if (VL.isImm ())
166+ SrcVL.ChangeToImmediate (VL.getImm ());
167+ else if (VL.isReg ())
168+ SrcVL.ChangeToRegister (VL.getReg (), false );
169+
170+ // TODO: For instructions with a passthru, we could clear the passthru
171+ // and tail policy since we've just proven the tail is not demanded.
172+ return true ;
173+ }
174+
84175// / Check if an operand is an immediate or a materialized ADDI $x0, imm.
85176std::optional<unsigned >
86177RISCVVectorPeephole::getConstant (const MachineOperand &VL) const {
@@ -325,22 +416,6 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
325416 return true ;
326417}
327418
328- // / Given two VL operands, returns the one known to be the smallest or nullptr
329- // / if unknown.
330- static const MachineOperand *getKnownMinVL (const MachineOperand *LHS,
331- const MachineOperand *RHS) {
332- if (LHS->isReg () && RHS->isReg () && LHS->getReg ().isVirtual () &&
333- LHS->getReg () == RHS->getReg ())
334- return LHS;
335- if (LHS->isImm () && LHS->getImm () == RISCV::VLMaxSentinel)
336- return RHS;
337- if (RHS->isImm () && RHS->getImm () == RISCV::VLMaxSentinel)
338- return LHS;
339- if (!LHS->isImm () || !RHS->isImm ())
340- return nullptr ;
341- return LHS->getImm () <= RHS->getImm () ? LHS : RHS;
342- }
343-
344419// / Check if it's safe to move From down to To, checking that no physical
345420// / registers are clobbered.
346421static bool isSafeToMove (const MachineInstr &From, const MachineInstr &To) {
@@ -362,21 +437,16 @@ static bool isSafeToMove(const MachineInstr &From, const MachineInstr &To) {
362437 return From.isSafeToMove (SawStore);
363438}
364439
365- static unsigned getSEWLMULRatio (const MachineInstr &MI) {
366- RISCVII::VLMUL LMUL = RISCVII::getLMul (MI.getDesc ().TSFlags );
367- unsigned Log2SEW = MI.getOperand (RISCVII::getSEWOpNum (MI.getDesc ())).getImm ();
368- return RISCVVType::getSEWLMULRatio (1 << Log2SEW, LMUL);
369- }
370-
371440// / If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL
372441// / into it.
373442// /
374443// / %x = PseudoVADD_V_V_M1 %passthru, %a, %b, %vl1, sew, policy
375444// / %y = PseudoVMV_V_V_M1 %passthru, %x, %vl2, sew, policy
445+ // / (where %vl1 <= %vl2, see related tryToReduceVL)
376446// /
377447// / ->
378448// /
379- // / %y = PseudoVADD_V_V_M1 %passthru, %a, %b, min( vl1, vl2) , sew, policy
449+ // / %y = PseudoVADD_V_V_M1 %passthru, %a, %b, vl1, sew, policy
380450bool RISCVVectorPeephole::foldVMV_V_V (MachineInstr &MI) {
381451 if (RISCV::getRVVMCOpcode (MI.getOpcode ()) != RISCV::VMV_V_V)
382452 return false ;
@@ -404,33 +474,16 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
404474 SrcPassthru.getReg () != Passthru.getReg ())
405475 return false ;
406476
407- // Because Src and MI have the same passthru, we can use either AVL as long as
408- // it's the smaller of the two.
409- //
410- // (src pt, ..., vl=5) x x x x x|. . .
411- // (vmv.v.v pt, src, vl=3) x x x|. . . . .
412- // ->
413- // (src pt, ..., vl=3) x x x|. . . . .
414- //
415- // (src pt, ..., vl=3) x x x|. . . . .
416- // (vmv.v.v pt, src, vl=6) x x x . . .|. .
417- // ->
418- // (src pt, ..., vl=3) x x x|. . . . .
477+ // Src VL will have already been reduced if legal (see tryToReduceVL),
478+ // so we don't need to handle a smaller source VL here. However, the
479+ // user's VL may be larger
419480 MachineOperand &SrcVL = Src->getOperand (RISCVII::getVLOpNum (Src->getDesc ()));
420- const MachineOperand *MinVL = getKnownMinVL (&MI.getOperand (3 ), &SrcVL);
421- if (!MinVL)
422- return false ;
423-
424- bool VLChanged = !MinVL->isIdenticalTo (SrcVL);
425- bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult (
426- TII->get (RISCV::getRVVMCOpcode (Src->getOpcode ())).TSFlags );
427-
428- if (VLChanged && (ActiveElementsAffectResult || Src->mayRaiseFPException ()))
481+ if (!isVLKnownLE (SrcVL, MI.getOperand (3 )))
429482 return false ;
430483
431484 // If Src ends up using MI's passthru/VL, move it so it can access it.
432485 // TODO: We don't need to do this if they already dominate Src.
433- if (!SrcVL. isIdenticalTo (*MinVL) || ! SrcPassthru.isIdenticalTo (Passthru)) {
486+ if (!SrcPassthru.isIdenticalTo (Passthru)) {
434487 if (!isSafeToMove (*Src, MI))
435488 return false ;
436489 Src->moveBefore (&MI);
@@ -445,11 +498,6 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
445498 *Src->getParent ()->getParent ()));
446499 }
447500
448- if (MinVL->isImm ())
449- SrcVL.ChangeToImmediate (MinVL->getImm ());
450- else if (MinVL->isReg ())
451- SrcVL.ChangeToRegister (MinVL->getReg (), false );
452-
453501 // Use a conservative tu,mu policy, RISCVInsertVSETVLI will relax it if
454502 // passthru is undef.
455503 Src->getOperand (RISCVII::getVecPolicyOpNum (Src->getDesc ()))
@@ -498,6 +546,7 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
498546 for (MachineBasicBlock &MBB : MF) {
499547 for (MachineInstr &MI : make_early_inc_range (MBB)) {
500548 Changed |= convertToVLMAX (MI);
549+ Changed |= tryToReduceVL (MI);
501550 Changed |= convertToUnmasked (MI);
502551 Changed |= convertToWholeRegister (MI);
503552 Changed |= convertVMergeToVMv (MI);
0 commit comments