diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp index 871fc76d7d988..176dda4a84c75 100644 --- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -9,20 +9,26 @@ // This pass performs various peephole optimisations that fold masks into vector // pseudo instructions after instruction selection. // -// Currently it converts -// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew +// It performs the following transforms: +// +// %true = PseudoFOO %passthru, ..., %vl, %sew +// %x = PseudoVMERGE_VVM %passthru, %passthru, %true, %mask, %vl, %sew +// -> +// %x = PseudoFOO_MASK %false, ..., %mask, %vl, %sew +// +// %x = PseudoFOO_MASK ..., %allonesmask, %vl, %sew // -> -// PseudoVMV_V_V %false, %true, %vl, %sew +// %x = PseudoFOO ..., %vl, %sew +// +// %x = PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew +// -> +// %x = PseudoVMV_V_V %false, %true, %vl, %sew // //===---------------------------------------------------------------------===// #include "RISCV.h" #include "RISCVISelDAGToDAG.h" #include "RISCVSubtarget.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" using namespace llvm; @@ -50,6 +56,7 @@ class RISCVFoldMasks : public MachineFunctionPass { private: bool convertToUnmasked(MachineInstr &MI, MachineInstr *MaskDef); + bool foldVMergeIntoOps(MachineInstr &MI, MachineInstr *MaskDef); bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef); bool isAllOnesMask(MachineInstr *MaskDef); @@ -89,6 +96,261 @@ bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskDef) { } } +static unsigned getVMSetForLMul(RISCVII::VLMUL LMUL) { + switch (LMUL) { + case RISCVII::LMUL_F8: + return RISCV::PseudoVMSET_M_B1; + case RISCVII::LMUL_F4: + return RISCV::PseudoVMSET_M_B2; + case RISCVII::LMUL_F2: + return RISCV::PseudoVMSET_M_B4; + case RISCVII::LMUL_1: + return RISCV::PseudoVMSET_M_B8; + case RISCVII::LMUL_2: + return RISCV::PseudoVMSET_M_B16; + case RISCVII::LMUL_4: + return RISCV::PseudoVMSET_M_B32; + case RISCVII::LMUL_8: + return RISCV::PseudoVMSET_M_B64; + case RISCVII::LMUL_RESERVED: + llvm_unreachable("Unexpected LMUL"); + } + llvm_unreachable("Unknown VLMUL enum"); +} + +// Try to fold away VMERGE_VVM instructions. We handle these cases: +// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction +// folds to a masked TU instruction. VMERGE_VVM must have have merge operand +// same as false operand. +// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a +// masked TA instruction. +// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to +// masked TU instruction. Both instructions must have the same merge operand. +// VMERGE_VVM must have have merge operand same as false operand. +// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied, +// not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo +// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo +// form. +bool RISCVFoldMasks::foldVMergeIntoOps(MachineInstr &MI, + MachineInstr *MaskDef) { + MachineOperand *True; + MachineOperand *Merge; + MachineOperand *False; + + const unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode()); + // A vmv.v.v is equivalent to a vmerge with an all-ones mask. + if (BaseOpc == RISCV::VMV_V_V) { + Merge = &MI.getOperand(1); + False = &MI.getOperand(1); + True = &MI.getOperand(2); + } else if (BaseOpc == RISCV::VMERGE_VVM) { + Merge = &MI.getOperand(1); + False = &MI.getOperand(2); + True = &MI.getOperand(3); + } else + return false; + + MachineInstr &TrueMI = *MRI->getVRegDef(True->getReg()); + if (TrueMI.getParent() != MI.getParent()) + return false; + + // We require that either merge and false are the same, or that merge + // is undefined. + if (Merge->getReg() != RISCV::NoRegister && + TRI->lookThruCopyLike(Merge->getReg(), MRI) != + TRI->lookThruCopyLike(False->getReg(), MRI)) + return false; + + // N must be the only user of True. + if (!MRI->hasOneUse(True->getReg())) + return false; + + unsigned TrueOpc = TrueMI.getOpcode(); + const MCInstrDesc &TrueMCID = TrueMI.getDesc(); + bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); + + const bool MIIsMasked = + BaseOpc == RISCV::VMERGE_VVM && !isAllOnesMask(MaskDef); + bool TrueIsMasked = false; + const RISCV::RISCVMaskedPseudoInfo *Info = + RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); + if (!Info && HasTiedDest) { + Info = RISCV::getMaskedPseudoInfo(TrueOpc); + TrueIsMasked = true; + } + + if (!Info) + return false; + + // When Mask is not a true mask, this transformation is illegal for some + // operations whose results are affected by mask, like viota.m. + if (Info->MaskAffectsResult && MIIsMasked) + return false; + + MachineOperand &TrueMergeOp = TrueMI.getOperand(1); + if (HasTiedDest && TrueMergeOp.getReg() != RISCV::NoRegister) { + // The vmerge instruction must be TU. + // FIXME: This could be relaxed, but we need to handle the policy for the + // resulting op correctly. + if (Merge->getReg() == RISCV::NoRegister) + return false; + // Both the vmerge instruction and the True instruction must have the same + // merge operand. + if (TrueMergeOp.getReg() != RISCV::NoRegister && + TrueMergeOp.getReg() != False->getReg()) + return false; + } + + if (TrueIsMasked) { + assert(HasTiedDest && "Expected tied dest"); + // The vmerge instruction must be TU. + if (Merge->getReg() == RISCV::NoRegister) + return false; + // MI must have an all 1s mask since we're going to keep the mask from the + // True instruction. + // FIXME: Support mask agnostic True instruction which would have an undef + // merge operand. + if (MIIsMasked) + return false; + } + + // Skip if True has side effect. + // TODO: Support vleff and vlsegff. + if (TII->get(TrueOpc).hasUnmodeledSideEffects()) + return false; + + auto GetMinVL = + [](const MachineOperand &LHS, + const MachineOperand &RHS) -> std::optional { + if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() && + LHS.getReg() == RHS.getReg()) + return LHS; + if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel) + return RHS; + if (RHS.isImm() && RHS.getImm() == RISCV::VLMaxSentinel) + return LHS; + if (!LHS.isImm() || !RHS.isImm()) + return std::nullopt; + return LHS.getImm() <= RHS.getImm() ? LHS : RHS; + }; + + // Because MI and True must have the same merge operand (or True's operand is + // implicit_def), the "effective" body is the minimum of their VLs. + const MachineOperand &TrueVL = + TrueMI.getOperand(RISCVII::getVLOpNum(TrueMCID)); + const MachineOperand &VL = MI.getOperand(RISCVII::getVLOpNum(MI.getDesc())); + auto MinVL = GetMinVL(TrueVL, VL); + if (!MinVL) + return false; + bool VLChanged = !MinVL->isIdenticalTo(VL); + + // If we end up changing the VL or mask of True, then we need to make sure it + // doesn't raise any observable fp exceptions, since changing the active + // elements will affect how fflags is set. + if (VLChanged || !TrueIsMasked) + if (TrueMCID.mayRaiseFPException() && + !TrueMI.getFlag(MachineInstr::MIFlag::NoFPExcept)) + return false; + + unsigned MaskedOpc = Info->MaskedPseudo; + const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); +#ifndef NDEBUG + assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && + "Expected instructions with mask have policy operand."); + assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), + MCOI::TIED_TO) == 0 && + "Expected instructions with mask have a tied dest."); +#endif + + // Sink True down to MI so that it can access MI's operands. + assert(!TrueMI.hasImplicitDef()); + bool SawStore = false; + for (MachineBasicBlock::instr_iterator II = TrueMI.getIterator(); + II != MI.getIterator(); II++) { + if (II->mayStore()) { + SawStore = true; + break; + } + } + if (!TrueMI.isSafeToMove(nullptr, SawStore)) + return false; + TrueMI.moveBefore(&MI); + + // Set the merge to the false operand of the merge. + TrueMI.getOperand(1).setReg(False->getReg()); + + bool NeedToMoveOldMask = TrueIsMasked; + // If we're converting it to a masked pseudo, reuse MI's mask. + if (!TrueIsMasked) { + if (BaseOpc == RISCV::VMV_V_V) { + // If MI is a vmv.v.v, it won't have a mask operand. So insert an all-ones + // mask just before True. + unsigned VMSetOpc = + getVMSetForLMul(RISCVII::getLMul(MI.getDesc().TSFlags)); + Register Dest = MRI->createVirtualRegister(&RISCV::VRRegClass); + BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(VMSetOpc), + Dest) + .add(VL) + .add(TrueMI.getOperand(RISCVII::getSEWOpNum(TrueMCID))); + BuildMI(*MI.getParent(), TrueMI, MI.getDebugLoc(), TII->get(RISCV::COPY), + RISCV::V0) + .addReg(Dest); + NeedToMoveOldMask = true; + } + + TrueMI.setDesc(MaskedMCID); + + // TODO: Increment MaskOpIdx by number of explicit defs in tablegen? + unsigned MaskOpIdx = Info->MaskOpIdx + TrueMI.getNumExplicitDefs(); + TrueMI.insert(&TrueMI.getOperand(MaskOpIdx), + MachineOperand::CreateReg(RISCV::V0, false)); + } + + // Update the AVL. + if (MinVL->isReg()) + TrueMI.getOperand(RISCVII::getVLOpNum(MaskedMCID)) + .ChangeToRegister(MinVL->getReg(), false); + else + TrueMI.getOperand(RISCVII::getVLOpNum(MaskedMCID)) + .ChangeToImmediate(MinVL->getImm()); + + // Use a tumu policy, relaxing it to tail agnostic provided that the merge + // operand is undefined. + // + // However, if the VL became smaller than what the vmerge had originally, then + // elements past VL that were previously in the vmerge's body will have moved + // to the tail. In that case we always need to use tail undisturbed to + // preserve them. + uint64_t Policy = (Merge->getReg() == RISCV::NoRegister && !VLChanged) + ? RISCVII::TAIL_AGNOSTIC + : RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED; + TrueMI.getOperand(RISCVII::getVecPolicyOpNum(MaskedMCID)).setImm(Policy); + + const TargetRegisterClass *V0RC = + TII->getRegClass(MaskedMCID, 0, TRI, *MI.getMF()); + + // The destination and passthru can no longer be in V0. + MRI->constrainRegClass(TrueMI.getOperand(0).getReg(), V0RC); + Register PassthruReg = TrueMI.getOperand(1).getReg(); + if (PassthruReg != RISCV::NoRegister) + MRI->constrainRegClass(PassthruReg, V0RC); + + MRI->replaceRegWith(MI.getOperand(0).getReg(), TrueMI.getOperand(0).getReg()); + + // We need to move the old mask copy to after MI if: + // - TrueMI is masked and we are using its mask instead + // - We created a new all ones mask that clobbers V0 + if (NeedToMoveOldMask && MaskDef) { + assert(MaskDef->getParent() == MI.getParent()); + MaskDef->removeFromParent(); + MI.getParent()->insertAfter(MI.getIterator(), MaskDef); + } + + MI.eraseFromParent(); + + return true; +} + // Transform (VMERGE_VVM_ false, false, true, allones, vl, sew) to // (VMV_V_V_ false, true, vl, sew). It may decrease uses of VMSET. bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) { @@ -202,6 +464,13 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) { // on each pseudo. MachineInstr *CurrentV0Def; for (MachineBasicBlock &MBB : MF) { + CurrentV0Def = nullptr; + for (MachineInstr &MI : make_early_inc_range(MBB)) { + Changed |= foldVMergeIntoOps(MI, CurrentV0Def); + if (MI.definesRegister(RISCV::V0, TRI)) + CurrentV0Def = &MI; + } + CurrentV0Def = nullptr; for (MachineInstr &MI : MBB) { Changed |= convertToUnmasked(MI, CurrentV0Def); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index ae7f1743b5239..07748ace31a81 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -160,8 +160,6 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() { CurDAG->setRoot(Dummy.getValue()); - MadeChange |= doPeepholeMergeVVMFold(); - // After we're done with everything else, convert IMPLICIT_DEF // passthru operands to NoRegister. This is required to workaround // an optimization deficiency in MachineCSE. This really should @@ -3432,307 +3430,6 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { return true; } -static bool IsVMerge(SDNode *N) { - return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM; -} - -static bool IsVMv(SDNode *N) { - return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V; -} - -static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { - switch (LMUL) { - case RISCVII::LMUL_F8: - return RISCV::PseudoVMSET_M_B1; - case RISCVII::LMUL_F4: - return RISCV::PseudoVMSET_M_B2; - case RISCVII::LMUL_F2: - return RISCV::PseudoVMSET_M_B4; - case RISCVII::LMUL_1: - return RISCV::PseudoVMSET_M_B8; - case RISCVII::LMUL_2: - return RISCV::PseudoVMSET_M_B16; - case RISCVII::LMUL_4: - return RISCV::PseudoVMSET_M_B32; - case RISCVII::LMUL_8: - return RISCV::PseudoVMSET_M_B64; - case RISCVII::LMUL_RESERVED: - llvm_unreachable("Unexpected LMUL"); - } - llvm_unreachable("Unknown VLMUL enum"); -} - -// Try to fold away VMERGE_VVM instructions. We handle these cases: -// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction -// folds to a masked TU instruction. VMERGE_VVM must have have merge operand -// same as false operand. -// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a -// masked TA instruction. -// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to -// masked TU instruction. Both instructions must have the same merge operand. -// VMERGE_VVM must have have merge operand same as false operand. -// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied, -// not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo -// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo -// form. -bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { - SDValue Merge, False, True, VL, Mask, Glue; - // A vmv.v.v is equivalent to a vmerge with an all-ones mask. - if (IsVMv(N)) { - Merge = N->getOperand(0); - False = N->getOperand(0); - True = N->getOperand(1); - VL = N->getOperand(2); - // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones - // mask later below. - } else { - assert(IsVMerge(N)); - Merge = N->getOperand(0); - False = N->getOperand(1); - True = N->getOperand(2); - Mask = N->getOperand(3); - VL = N->getOperand(4); - // We always have a glue node for the mask at v0. - Glue = N->getOperand(N->getNumOperands() - 1); - } - assert(!Mask || cast(Mask)->getReg() == RISCV::V0); - assert(!Glue || Glue.getValueType() == MVT::Glue); - - // We require that either merge and false are the same, or that merge - // is undefined. - if (Merge != False && !isImplicitDef(Merge)) - return false; - - assert(True.getResNo() == 0 && - "Expect True is the first output of an instruction."); - - // Need N is the exactly one using True. - if (!True.hasOneUse()) - return false; - - if (!True.isMachineOpcode()) - return false; - - unsigned TrueOpc = True.getMachineOpcode(); - const MCInstrDesc &TrueMCID = TII->get(TrueOpc); - uint64_t TrueTSFlags = TrueMCID.TSFlags; - bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID); - - bool IsMasked = false; - const RISCV::RISCVMaskedPseudoInfo *Info = - RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc); - if (!Info && HasTiedDest) { - Info = RISCV::getMaskedPseudoInfo(TrueOpc); - IsMasked = true; - } - - if (!Info) - return false; - - // When Mask is not a true mask, this transformation is illegal for some - // operations whose results are affected by mask, like viota.m. - if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue)) - return false; - - if (HasTiedDest && !isImplicitDef(True->getOperand(0))) { - // The vmerge instruction must be TU. - // FIXME: This could be relaxed, but we need to handle the policy for the - // resulting op correctly. - if (isImplicitDef(Merge)) - return false; - SDValue MergeOpTrue = True->getOperand(0); - // Both the vmerge instruction and the True instruction must have the same - // merge operand. - if (False != MergeOpTrue) - return false; - } - - if (IsMasked) { - assert(HasTiedDest && "Expected tied dest"); - // The vmerge instruction must be TU. - if (isImplicitDef(Merge)) - return false; - // The vmerge instruction must have an all 1s mask since we're going to keep - // the mask from the True instruction. - // FIXME: Support mask agnostic True instruction which would have an - // undef merge operand. - if (Mask && !usesAllOnesMask(Mask, Glue)) - return false; - } - - // Skip if True has side effect. - // TODO: Support vleff and vlsegff. - if (TII->get(TrueOpc).hasUnmodeledSideEffects()) - return false; - - // The last operand of a masked instruction may be glued. - bool HasGlueOp = True->getGluedNode() != nullptr; - - // The chain operand may exist either before the glued operands or in the last - // position. - unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1; - bool HasChainOp = - True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other; - - if (HasChainOp) { - // Avoid creating cycles in the DAG. We must ensure that none of the other - // operands depend on True through it's Chain. - SmallVector LoopWorklist; - SmallPtrSet Visited; - LoopWorklist.push_back(False.getNode()); - if (Mask) - LoopWorklist.push_back(Mask.getNode()); - LoopWorklist.push_back(VL.getNode()); - if (Glue) - LoopWorklist.push_back(Glue.getNode()); - if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) - return false; - } - - // The vector policy operand may be present for masked intrinsics - bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags); - unsigned TrueVLIndex = - True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2; - SDValue TrueVL = True.getOperand(TrueVLIndex); - SDValue SEW = True.getOperand(TrueVLIndex + 1); - - auto GetMinVL = [](SDValue LHS, SDValue RHS) { - if (LHS == RHS) - return LHS; - if (isAllOnesConstant(LHS)) - return RHS; - if (isAllOnesConstant(RHS)) - return LHS; - auto *CLHS = dyn_cast(LHS); - auto *CRHS = dyn_cast(RHS); - if (!CLHS || !CRHS) - return SDValue(); - return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS; - }; - - // Because N and True must have the same merge operand (or True's operand is - // implicit_def), the "effective" body is the minimum of their VLs. - SDValue OrigVL = VL; - VL = GetMinVL(TrueVL, VL); - if (!VL) - return false; - - // If we end up changing the VL or mask of True, then we need to make sure it - // doesn't raise any observable fp exceptions, since changing the active - // elements will affect how fflags is set. - if (TrueVL != VL || !IsMasked) - if (mayRaiseFPException(True.getNode()) && - !True->getFlags().hasNoFPExcept()) - return false; - - SDLoc DL(N); - - // From the preconditions we checked above, we know the mask and thus glue - // for the result node will be taken from True. - if (IsMasked) { - Mask = True->getOperand(Info->MaskOpIdx); - Glue = True->getOperand(True->getNumOperands() - 1); - assert(Glue.getValueType() == MVT::Glue); - } - // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create - // an all-ones mask to use. - else if (IsVMv(N)) { - unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags; - unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags)); - ElementCount EC = N->getValueType(0).getVectorElementCount(); - MVT MaskVT = MVT::getVectorVT(MVT::i1, EC); - - SDValue AllOnesMask = - SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0); - SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, - RISCV::V0, AllOnesMask, SDValue()); - Mask = CurDAG->getRegister(RISCV::V0, MaskVT); - Glue = MaskCopy.getValue(1); - } - - unsigned MaskedOpc = Info->MaskedPseudo; -#ifndef NDEBUG - const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc); - assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) && - "Expected instructions with mask have policy operand."); - assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(), - MCOI::TIED_TO) == 0 && - "Expected instructions with mask have a tied dest."); -#endif - - // Use a tumu policy, relaxing it to tail agnostic provided that the merge - // operand is undefined. - // - // However, if the VL became smaller than what the vmerge had originally, then - // elements past VL that were previously in the vmerge's body will have moved - // to the tail. In that case we always need to use tail undisturbed to - // preserve them. - bool MergeVLShrunk = VL != OrigVL; - uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk) - ? RISCVII::TAIL_AGNOSTIC - : /*TUMU*/ 0; - SDValue PolicyOp = - CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); - - - SmallVector Ops; - Ops.push_back(False); - - const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags); - const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode; - assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx); - Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd); - - Ops.push_back(Mask); - - // For unmasked "VOp" with rounding mode operand, that is interfaces like - // (..., rm, vl) or (..., rm, vl, policy). - // Its masked version is (..., vm, rm, vl, policy). - // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td - if (HasRoundingMode) - Ops.push_back(True->getOperand(TrueVLIndex - 1)); - - Ops.append({VL, SEW, PolicyOp}); - - // Result node should have chain operand of True. - if (HasChainOp) - Ops.push_back(True.getOperand(TrueChainOpIdx)); - - // Add the glue for the CopyToReg of mask->v0. - Ops.push_back(Glue); - - MachineSDNode *Result = - CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); - Result->setFlags(True->getFlags()); - - if (!cast(True)->memoperands_empty()) - CurDAG->setNodeMemRefs(Result, cast(True)->memoperands()); - - // Replace vmerge.vvm node by Result. - ReplaceUses(SDValue(N, 0), SDValue(Result, 0)); - - // Replace another value of True. E.g. chain and VL. - for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx) - ReplaceUses(True.getValue(Idx), SDValue(Result, Idx)); - - return true; -} - -bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() { - bool MadeChange = false; - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); - - while (Position != CurDAG->allnodes_begin()) { - SDNode *N = &*--Position; - if (N->use_empty() || !N->isMachineOpcode()) - continue; - - if (IsVMerge(N) || IsVMv(N)) - MadeChange |= performCombineVMergeAndVOps(N); - } - return MadeChange; -} - /// If our passthru is an implicit_def, use noreg instead. This side /// steps issues with MachineCSE not being able to CSE expressions with /// IMPLICIT_DEF operands while preserving the semantic intent. See diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 77e174135a599..f7551fe4d29a6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -188,9 +188,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { private: bool doPeepholeSExtW(SDNode *Node); bool doPeepholeMaskedRVV(MachineSDNode *Node); - bool doPeepholeMergeVVMFold(); bool doPeepholeNoRegPassThru(); - bool performCombineVMergeAndVOps(SDNode *N); }; namespace RISCV { diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-fold-vmerge.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-fold-vmerge.mir new file mode 100644 index 0000000000000..b9e9111198606 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-fold-vmerge.mir @@ -0,0 +1,112 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \ +# RUN: -verify-machineinstrs | FileCheck %s + +--- +name: vmerge_unmasked +body: | + bb.0: + liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-LABEL: name: vmerge_unmasked + ; CHECK: liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 1 /* ta, mu */ + %avl:gprnox0 = COPY $x1 + %false:vr = COPY $v8 + %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3 + %mask:vmv0 = COPY $v0 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 +... +--- +name: vmerge_masked +body: | + bb.0: + liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-LABEL: name: vmerge_masked + ; CHECK: liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 + ; CHECK-NEXT: %addmask:vmv0 = COPY $v0 + ; CHECK-NEXT: $v0 = COPY %addmask + ; CHECK-NEXT: %mergemask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v0 = COPY %mergemask + %avl:gprnox0 = COPY $x1 + %false:vrnov0 = COPY $v8 + %addmask:vmv0 = COPY $v0 + $v0 = COPY %addmask + %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5, 0 + %mergemask:vmv0 = PseudoVMSET_M_B8 %avl, 5 + $v0 = COPY %mergemask + %x:vrnov0 = PseudoVMERGE_VVM_M1 %false, %false, %true, $v0, %avl, 5 +... +--- +name: vmv_unmasked +body: | + bb.0: + liveins: $x1, $v8, $v9, $v10 + ; CHECK-LABEL: name: vmv_unmasked + ; CHECK: liveins: $x1, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: [[PseudoVMSET_M_B8_:%[0-9]+]]:vr = PseudoVMSET_M_B8 %avl, 5 /* e32 */ + ; CHECK-NEXT: $v0 = COPY [[PseudoVMSET_M_B8_]] + ; CHECK-NEXT: %true:vr = PseudoVADD_VV_M1 %false, $v9, $v10, %avl, 5 /* e32 */, 0 /* tu, mu */ + %avl:gprnox0 = COPY $x1 + %false:vr = COPY $v8 + %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3 + %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5, 0 +... +--- +name: vmv_masked +body: | + bb.0: + liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-LABEL: name: vmv_masked + ; CHECK: liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vrnov0 = COPY $v8 + ; CHECK-NEXT: %addmask:vmv0 = COPY $v0 + ; CHECK-NEXT: %true:vrnov0 = PseudoVADD_VV_M1_MASK %false, $v9, $v10, $v0, %avl, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: $v0 = COPY %addmask + %avl:gprnox0 = COPY $x1 + %false:vrnov0 = COPY $v8 + %addmask:vmv0 = COPY $v0 + $v0 = COPY %addmask + %true:vrnov0 = PseudoVADD_VV_M1_MASK $noreg, $v9, $v10, $v0, %avl, 5, 3 + %x:vrnov0 = PseudoVMV_V_V_M1 %false, %true, %avl, 5, 0 +... +--- +# Shouldn't fold this because vadd is in another BB +name: different_bb +body: | + ; CHECK-LABEL: name: different_bb + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x1, $v0, $v8, $v9, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 + ; CHECK-NEXT: %false:vr = COPY $v8 + ; CHECK-NEXT: %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5 /* e32 */, 3 /* ta, ma */ + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: %mask:vmv0 = COPY $v0 + ; CHECK-NEXT: $v0 = COPY %mask + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 /* e32 */ + bb.0: + liveins: $x1, $v0, $v8, $v9, $v10 + %avl:gprnox0 = COPY $x1 + %false:vr = COPY $v8 + %true:vr = PseudoVADD_VV_M1 $noreg, $v9, $v10, %avl, 5, 3 + bb.1: + %mask:vmv0 = COPY $v0 + $v0 = COPY %mask + %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll index d612298bf50e7..4f88806054dfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=finalize-isel | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -stop-after=riscv-fold-masks | FileCheck %s declare @llvm.vp.merge.nxv2i32(, , , i32) declare @llvm.vp.select.nxv2i32(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index c639f092444fc..5c8b02237382f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -952,22 +952,20 @@ define @vpselect_trunc( %passthru, @llvm.riscv.vle.nxv32i16.i64( undef, * null, i64 1)