@@ -65,6 +65,7 @@ class RISCVVectorPeephole : public MachineFunctionPass {
6565 bool convertToWholeRegister (MachineInstr &MI) const ;
6666 bool convertToUnmasked (MachineInstr &MI) const ;
6767 bool convertVMergeToVMv (MachineInstr &MI) const ;
68+ bool foldVMV_V_V (MachineInstr &MI);
6869
6970 bool isAllOnesMask (const MachineInstr *MaskDef) const ;
7071 std::optional<unsigned > getConstant (const MachineOperand &VL) const ;
@@ -324,6 +325,143 @@ bool RISCVVectorPeephole::convertToUnmasked(MachineInstr &MI) const {
324325 return true ;
325326}
326327
328+ // / Given two VL operands, returns the one known to be the smallest or nullptr
329+ // / if unknown.
330+ static const MachineOperand *getKnownMinVL (const MachineOperand *LHS,
331+ const MachineOperand *RHS) {
332+ if (LHS->isReg () && RHS->isReg () && LHS->getReg ().isVirtual () &&
333+ LHS->getReg () == RHS->getReg ())
334+ return LHS;
335+ if (LHS->isImm () && LHS->getImm () == RISCV::VLMaxSentinel)
336+ return RHS;
337+ if (RHS->isImm () && RHS->getImm () == RISCV::VLMaxSentinel)
338+ return LHS;
339+ if (!LHS->isImm () || !RHS->isImm ())
340+ return nullptr ;
341+ return LHS->getImm () <= RHS->getImm () ? LHS : RHS;
342+ }
343+
344+ // / Check if it's safe to move From down to To, checking that no physical
345+ // / registers are clobbered.
346+ static bool isSafeToMove (const MachineInstr &From, const MachineInstr &To) {
347+ assert (From.getParent () == To.getParent () && !From.hasImplicitDef ());
348+ SmallVector<Register> PhysUses;
349+ for (const MachineOperand &MO : From.all_uses ())
350+ if (MO.getReg ().isPhysical ())
351+ PhysUses.push_back (MO.getReg ());
352+ bool SawStore = false ;
353+ for (auto II = From.getIterator (); II != To.getIterator (); II++) {
354+ for (Register PhysReg : PhysUses)
355+ if (II->definesRegister (PhysReg, nullptr ))
356+ return false ;
357+ if (II->mayStore ()) {
358+ SawStore = true ;
359+ break ;
360+ }
361+ }
362+ return From.isSafeToMove (SawStore);
363+ }
364+
365+ static unsigned getSEWLMULRatio (const MachineInstr &MI) {
366+ RISCVII::VLMUL LMUL = RISCVII::getLMul (MI.getDesc ().TSFlags );
367+ unsigned Log2SEW = MI.getOperand (RISCVII::getSEWOpNum (MI.getDesc ())).getImm ();
368+ return RISCVVType::getSEWLMULRatio (1 << Log2SEW, LMUL);
369+ }
370+
371+ // / If a PseudoVMV_V_V is the only user of its input, fold its passthru and VL
372+ // / into it.
373+ // /
374+ // / %x = PseudoVADD_V_V_M1 %passthru, %a, %b, %vl1, sew, policy
375+ // / %y = PseudoVMV_V_V_M1 %passthru, %x, %vl2, sew, policy
376+ // /
377+ // / ->
378+ // /
379+ // / %y = PseudoVADD_V_V_M1 %passthru, %a, %b, min(vl1, vl2), sew, policy
380+ bool RISCVVectorPeephole::foldVMV_V_V (MachineInstr &MI) {
381+ if (RISCV::getRVVMCOpcode (MI.getOpcode ()) != RISCV::VMV_V_V)
382+ return false ;
383+
384+ MachineOperand &Passthru = MI.getOperand (1 );
385+
386+ if (!MRI->hasOneUse (MI.getOperand (2 ).getReg ()))
387+ return false ;
388+
389+ MachineInstr *Src = MRI->getVRegDef (MI.getOperand (2 ).getReg ());
390+ if (!Src || Src->hasUnmodeledSideEffects () ||
391+ Src->getParent () != MI.getParent () || Src->getNumDefs () != 1 ||
392+ !RISCVII::isFirstDefTiedToFirstUse (Src->getDesc ()) ||
393+ !RISCVII::hasVLOp (Src->getDesc ().TSFlags ) ||
394+ !RISCVII::hasVecPolicyOp (Src->getDesc ().TSFlags ))
395+ return false ;
396+
397+ // Src needs to have the same VLMAX as MI
398+ if (getSEWLMULRatio (MI) != getSEWLMULRatio (*Src))
399+ return false ;
400+
401+ // Src needs to have the same passthru as VMV_V_V
402+ MachineOperand &SrcPassthru = Src->getOperand (1 );
403+ if (SrcPassthru.getReg () != RISCV::NoRegister &&
404+ SrcPassthru.getReg () != Passthru.getReg ())
405+ return false ;
406+
407+ // Because Src and MI have the same passthru, we can use either AVL as long as
408+ // it's the smaller of the two.
409+ //
410+ // (src pt, ..., vl=5) x x x x x|. . .
411+ // (vmv.v.v pt, src, vl=3) x x x|. . . . .
412+ // ->
413+ // (src pt, ..., vl=3) x x x|. . . . .
414+ //
415+ // (src pt, ..., vl=3) x x x|. . . . .
416+ // (vmv.v.v pt, src, vl=6) x x x . . .|. .
417+ // ->
418+ // (src pt, ..., vl=3) x x x|. . . . .
419+ MachineOperand &SrcVL = Src->getOperand (RISCVII::getVLOpNum (Src->getDesc ()));
420+ const MachineOperand *MinVL = getKnownMinVL (&MI.getOperand (3 ), &SrcVL);
421+ if (!MinVL)
422+ return false ;
423+
424+ bool VLChanged = !MinVL->isIdenticalTo (SrcVL);
425+ bool ActiveElementsAffectResult = RISCVII::activeElementsAffectResult (
426+ TII->get (RISCV::getRVVMCOpcode (Src->getOpcode ())).TSFlags );
427+
428+ if (VLChanged && (ActiveElementsAffectResult || Src->mayRaiseFPException ()))
429+ return false ;
430+
431+ // If Src ends up using MI's passthru/VL, move it so it can access it.
432+ // TODO: We don't need to do this if they already dominate Src.
433+ if (!SrcVL.isIdenticalTo (*MinVL) || !SrcPassthru.isIdenticalTo (Passthru)) {
434+ if (!isSafeToMove (*Src, MI))
435+ return false ;
436+ Src->moveBefore (&MI);
437+ }
438+
439+ if (SrcPassthru.getReg () != Passthru.getReg ()) {
440+ SrcPassthru.setReg (Passthru.getReg ());
441+ // If Src is masked then its passthru needs to be in VRNoV0.
442+ if (Passthru.getReg () != RISCV::NoRegister)
443+ MRI->constrainRegClass (Passthru.getReg (),
444+ TII->getRegClass (Src->getDesc (), 1 , TRI,
445+ *Src->getParent ()->getParent ()));
446+ }
447+
448+ if (MinVL->isImm ())
449+ SrcVL.ChangeToImmediate (MinVL->getImm ());
450+ else if (MinVL->isReg ())
451+ SrcVL.ChangeToRegister (MinVL->getReg (), false );
452+
453+ // Use a conservative tu,mu policy, RISCVInsertVSETVLI will relax it if
454+ // passthru is undef.
455+ Src->getOperand (RISCVII::getVecPolicyOpNum (Src->getDesc ()))
456+ .setImm (RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED);
457+
458+ MRI->replaceRegWith (MI.getOperand (0 ).getReg (), Src->getOperand (0 ).getReg ());
459+ MI.eraseFromParent ();
460+ V0Defs.erase (&MI);
461+
462+ return true ;
463+ }
464+
327465bool RISCVVectorPeephole::runOnMachineFunction (MachineFunction &MF) {
328466 if (skipFunction (MF.getFunction ()))
329467 return false ;
@@ -358,11 +496,12 @@ bool RISCVVectorPeephole::runOnMachineFunction(MachineFunction &MF) {
358496 }
359497
360498 for (MachineBasicBlock &MBB : MF) {
361- for (MachineInstr &MI : MBB) {
499+ for (MachineInstr &MI : make_early_inc_range ( MBB) ) {
362500 Changed |= convertToVLMAX (MI);
363501 Changed |= convertToUnmasked (MI);
364502 Changed |= convertToWholeRegister (MI);
365503 Changed |= convertVMergeToVMv (MI);
504+ Changed |= foldVMV_V_V (MI);
366505 }
367506 }
368507
0 commit comments