@@ -3792,8 +3792,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
37923792 State.setDebugLocFrom (I->getDebugLoc ());
37933793
37943794 VPValue *LoopExitInstDef = PhiR->getBackedgeValue ();
3795- // This is the vector-clone of the value that leaves the loop.
3796- Type *VecTy = State.get (LoopExitInstDef, 0 )->getType ();
37973795
37983796 // Before each round, move the insertion point right between
37993797 // the PHIs and the values we are going to write.
@@ -3805,10 +3803,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
38053803 State.setDebugLocFrom (LoopExitInst->getDebugLoc ());
38063804
38073805 Type *PhiTy = OrigPhi->getType ();
3808-
3809- VPBasicBlock *LatchVPBB =
3810- PhiR->getParent ()->getEnclosingLoopRegion ()->getExitingBasicBlock ();
3811- BasicBlock *VectorLoopLatch = State.CFG .VPBB2IRBB [LatchVPBB];
38123806 // If tail is folded by masking, the vector value to leave the loop should be
38133807 // a Select choosing between the vectorized LoopExitInst and vectorized Phi,
38143808 // instead of the former. For an inloop reduction the reduction will already
@@ -3834,23 +3828,12 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
38343828 // then extend the loop exit value to enable InstCombine to evaluate the
38353829 // entire expression in the smaller type.
38363830 if (VF.isVector () && PhiTy != RdxDesc.getRecurrenceType ()) {
3837- assert (!PhiR->isInLoop () && " Unexpected truncated inloop reduction!" );
3838- Type *RdxVecTy = VectorType::get (RdxDesc.getRecurrenceType (), VF);
3839- Builder.SetInsertPoint (VectorLoopLatch->getTerminator ());
3840- for (unsigned Part = 0 ; Part < UF; ++Part) {
3841- Value *Trunc = Builder.CreateTrunc (RdxParts[Part], RdxVecTy);
3842- Value *Extnd = RdxDesc.isSigned () ? Builder.CreateSExt (Trunc, VecTy)
3843- : Builder.CreateZExt (Trunc, VecTy);
3844- for (User *U : llvm::make_early_inc_range (RdxParts[Part]->users ()))
3845- if (U != Trunc) {
3846- U->replaceUsesOfWith (RdxParts[Part], Extnd);
3847- RdxParts[Part] = Extnd;
3848- }
3849- }
38503831 Builder.SetInsertPoint (LoopMiddleBlock,
38513832 LoopMiddleBlock->getFirstInsertionPt ());
3852- for (unsigned Part = 0 ; Part < UF; ++Part)
3833+ Type *RdxVecTy = VectorType::get (RdxDesc.getRecurrenceType (), VF);
3834+ for (unsigned Part = 0 ; Part < UF; ++Part) {
38533835 RdxParts[Part] = Builder.CreateTrunc (RdxParts[Part], RdxVecTy);
3836+ }
38543837 }
38553838
38563839 // Reduce all of the unrolled parts into a single vector.
@@ -9155,35 +9138,55 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
91559138 PreviousLink = RedRecipe;
91569139 }
91579140 }
9158-
9159- // If tail is folded by masking, introduce selects between the phi
9160- // and the live-out instruction of each reduction, at the beginning of the
9161- // dedicated latch block.
9162- if (CM.foldTailByMasking ()) {
91639141 Builder.setInsertPoint (&*LatchVPBB->begin ());
91649142 for (VPRecipeBase &R :
91659143 Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
9166- VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9167- if (!PhiR || PhiR->isInLoop ())
9168- continue ;
9169- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9144+ VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9145+ if (!PhiR || PhiR->isInLoop ())
9146+ continue ;
9147+
9148+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9149+ auto *Result = PhiR->getBackedgeValue ()->getDefiningRecipe ();
9150+ // If tail is folded by masking, introduce selects between the phi
9151+ // and the live-out instruction of each reduction, at the beginning of the
9152+ // dedicated latch block.
9153+ if (CM.foldTailByMasking ()) {
91709154 VPValue *Cond =
91719155 RecipeBuilder.createBlockInMask (OrigLoop->getHeader (), *Plan);
91729156 VPValue *Red = PhiR->getBackedgeValue ();
91739157 assert (Red->getDefiningRecipe ()->getParent () != LatchVPBB &&
91749158 " reduction recipe must be defined before latch" );
91759159 FastMathFlags FMFs = RdxDesc.getFastMathFlags ();
91769160 Type *PhiTy = PhiR->getOperand (0 )->getLiveInIRValue ()->getType ();
9177- auto *Select =
9161+ Result =
91789162 PhiTy->isFloatingPointTy ()
91799163 ? new VPInstruction (Instruction::Select, {Cond, Red, PhiR}, FMFs)
91809164 : new VPInstruction (Instruction::Select, {Cond, Red, PhiR});
9181- Select ->insertBefore (&*Builder.getInsertPoint ());
9165+ Result ->insertBefore (&*Builder.getInsertPoint ());
91829166 if (PreferPredicatedReductionSelect ||
91839167 TTI.preferPredicatedReductionSelect (
91849168 PhiR->getRecurrenceDescriptor ().getOpcode (), PhiTy,
91859169 TargetTransformInfo::ReductionFlags ()))
9186- PhiR->setOperand (1 , Select);
9170+ PhiR->setOperand (1 , Result->getVPSingleValue ());
9171+ }
9172+ // If the vector reduction can be performed in a smaller type, we truncate
9173+ // then extend the loop exit value to enable InstCombine to evaluate the
9174+ // entire expression in the smaller type.
9175+ Type *PhiTy = PhiR->getStartValue ()->getLiveInIRValue ()->getType ();
9176+ if (PhiTy != RdxDesc.getRecurrenceType ()) {
9177+ assert (!PhiR->isInLoop () && " Unexpected truncated inloop reduction!" );
9178+ Type *RdxTy = RdxDesc.getRecurrenceType ();
9179+ auto *Trunc = new VPWidenCastRecipe (Instruction::Trunc,
9180+ Result->getVPSingleValue (), RdxTy);
9181+ auto *Extnd =
9182+ RdxDesc.isSigned ()
9183+ ? new VPWidenCastRecipe (Instruction::SExt, Trunc, PhiTy)
9184+ : new VPWidenCastRecipe (Instruction::ZExt, Trunc, PhiTy);
9185+
9186+ Trunc->insertAfter (Result);
9187+ Extnd->insertAfter (Trunc);
9188+ Result->getVPSingleValue ()->replaceAllUsesWith (Extnd);
9189+ Trunc->setOperand (0 , Result->getVPSingleValue ());
91879190 }
91889191 }
91899192
0 commit comments