diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index f1391ae93c8630..0e11e8704db2f2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -842,123 +842,8 @@ class InstructionsState { static InstructionsState invalid() { return {nullptr, nullptr}; } }; -struct InterchangeableInstruction { - unsigned Opcode; - SmallVector Ops; - template - InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args) - : Opcode(Opcode), Ops{std::forward(Args)...} {} -}; - -bool operator<(const InterchangeableInstruction &LHS, - const InterchangeableInstruction &RHS) { - return LHS.Opcode < RHS.Opcode; -} - } // end anonymous namespace -/// \returns a sorted list of interchangeable instructions by instruction opcode -/// that \p I can be converted to. -/// e.g., -/// x << y -> x * (2^y) -/// x << 1 -> x * 2 -/// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0 -/// x * 0 -> x & 0 -/// x * -1 -> 0 - x -/// TODO: support more patterns -static SmallVector -getInterchangeableInstruction(Instruction *I) { - // PII = Possible Interchangeable Instruction - SmallVector PII; - unsigned Opcode = I->getOpcode(); - PII.emplace_back(Opcode, I->operands()); - if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub, - Instruction::Add}, - Opcode)) - return PII; - Constant *C; - if (match(I, m_BinOp(m_Value(), m_Constant(C)))) { - ConstantInt *V = nullptr; - if (auto *CI = dyn_cast(C)) { - V = CI; - } else if (auto *CDV = dyn_cast(C)) { - if (auto *CI = dyn_cast_if_present(CDV->getSplatValue())) - V = CI; - } - if (!V) - return PII; - Value *Op0 = I->getOperand(0); - Type *Op1Ty = I->getOperand(1)->getType(); - const APInt &Op1Int = V->getValue(); - Constant *Zero = - ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth())); - Constant *UnsignedMax = - ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth())); - switch (Opcode) { - case Instruction::Shl: { - PII.emplace_back(Instruction::Mul, Op0, - ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue())); - if (Op1Int.isZero()) { - PII.emplace_back(Instruction::Sub, Op0, Zero); - PII.emplace_back(Instruction::Add, Op0, Zero); - PII.emplace_back(Instruction::And, Op0, UnsignedMax); - PII.emplace_back(Instruction::Or, Op0, Zero); - } - break; - } - case Instruction::Mul: { - if (Op1Int.isOne()) { - PII.emplace_back(Instruction::Sub, Op0, Zero); - PII.emplace_back(Instruction::Add, Op0, Zero); - PII.emplace_back(Instruction::And, Op0, UnsignedMax); - PII.emplace_back(Instruction::Or, Op0, Zero); - } else if (Op1Int.isZero()) { - PII.emplace_back(Instruction::And, Op0, Zero); - } else if (Op1Int.isAllOnes()) { - PII.emplace_back(Instruction::Sub, Zero, Op0); - } - break; - } - case Instruction::Sub: - if (Op1Int.isZero()) { - PII.emplace_back(Instruction::Add, Op0, Zero); - PII.emplace_back(Instruction::And, Op0, UnsignedMax); - PII.emplace_back(Instruction::Or, Op0, Zero); - } - break; - case Instruction::Add: - if (Op1Int.isZero()) { - PII.emplace_back(Instruction::And, Op0, UnsignedMax); - PII.emplace_back(Instruction::Or, Op0, Zero); - } - break; - } - } - // std::set_intersection requires a sorted range. - sort(PII); - return PII; -} - -/// \returns the Op and operands which \p I convert to. -static std::pair> -getInterchangeableInstruction(Instruction *I, Instruction *MainOp, - Instruction *AltOp) { - SmallVector IIList = - getInterchangeableInstruction(I); - const auto *Iter = find_if(IIList, [&](const InterchangeableInstruction &II) { - return II.Opcode == MainOp->getOpcode(); - }); - if (Iter == IIList.end()) { - Iter = find_if(IIList, [&](const InterchangeableInstruction &II) { - return II.Opcode == AltOp->getOpcode(); - }); - assert(Iter != IIList.end() && - "Cannot find an interchangeable instruction."); - return std::make_pair(AltOp, Iter->Ops); - } - return std::make_pair(MainOp, Iter->Ops); -} - /// \returns true if \p Opcode is allowed as part of the main/alternate /// instruction for SLP vectorization. /// @@ -1072,22 +957,6 @@ static InstructionsState getSameOpcode(ArrayRef VL, return InstructionsState::invalid(); } bool AnyPoison = InstCnt != VL.size(); - // Currently, this is only used for binary ops. - // TODO: support all instructions - SmallVector InterchangeableOpcode = - getInterchangeableInstruction(cast(V)); - SmallVector AlternateInterchangeableOpcode; - auto UpdateInterchangeableOpcode = - [](SmallVector &LHS, - ArrayRef RHS) { - SmallVector NewInterchangeableOpcode; - std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(), - std::back_inserter(NewInterchangeableOpcode)); - if (NewInterchangeableOpcode.empty()) - return false; - LHS.swap(NewInterchangeableOpcode); - return true; - }; for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { auto *I = dyn_cast(VL[Cnt]); if (!I) @@ -1100,32 +969,14 @@ static InstructionsState getSameOpcode(ArrayRef VL, return InstructionsState::invalid(); unsigned InstOpcode = I->getOpcode(); if (IsBinOp && isa(I)) { - SmallVector ThisInterchangeableOpcode( - getInterchangeableInstruction(I)); - if (UpdateInterchangeableOpcode(InterchangeableOpcode, - ThisInterchangeableOpcode)) + if (InstOpcode == Opcode || InstOpcode == AltOpcode) continue; - if (AlternateInterchangeableOpcode.empty()) { - InterchangeableOpcode.erase( - remove_if(InterchangeableOpcode, - [](const InterchangeableInstruction &I) { - return !isValidForAlternation(I.Opcode); - }), - InterchangeableOpcode.end()); - ThisInterchangeableOpcode.erase( - remove_if(ThisInterchangeableOpcode, - [](const InterchangeableInstruction &I) { - return !isValidForAlternation(I.Opcode); - }), - ThisInterchangeableOpcode.end()); - if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty()) - return InstructionsState::invalid(); - AlternateInterchangeableOpcode.swap(ThisInterchangeableOpcode); + if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) && + isValidForAlternation(Opcode)) { + AltOpcode = InstOpcode; + AltIndex = Cnt; continue; } - if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode, - ThisInterchangeableOpcode)) - continue; } else if (IsCastOp && isa(I)) { Value *Op0 = IBase->getOperand(0); Type *Ty0 = Op0->getType(); @@ -1226,24 +1077,6 @@ static InstructionsState getSameOpcode(ArrayRef VL, return InstructionsState::invalid(); } - if (IsBinOp) { - auto FindOp = [&](ArrayRef CandidateOp) { - for (Value *V : VL) { - if (isa(V)) - continue; - for (const InterchangeableInstruction &I : CandidateOp) - if (cast(V)->getOpcode() == I.Opcode) - return cast(V); - } - llvm_unreachable( - "Cannot find the candidate instruction for InstructionsState."); - }; - Instruction *MainOp = FindOp(InterchangeableOpcode); - Instruction *AltOp = AlternateInterchangeableOpcode.empty() - ? MainOp - : FindOp(AlternateInterchangeableOpcode); - return InstructionsState(MainOp, AltOp); - } return InstructionsState(cast(V), cast(VL[AltIndex])); } @@ -2574,46 +2407,42 @@ class BoUpSLP { } /// Go through the instructions in VL and append their operands. - void appendOperandsOfVL(ArrayRef VL, Instruction *MainOp, - Instruction *AltOp) { + void appendOperandsOfVL(ArrayRef VL, Instruction *VL0) { assert(!VL.empty() && "Bad VL"); assert((empty() || VL.size() == getNumLanes()) && "Expected same number of lanes"); // IntrinsicInst::isCommutative returns true if swapping the first "two" // arguments to the intrinsic produces the same result. constexpr unsigned IntrinsicNumOperands = 2; - unsigned NumOperands = MainOp->getNumOperands(); - ArgSize = isa(MainOp) ? IntrinsicNumOperands : NumOperands; + unsigned NumOperands = VL0->getNumOperands(); + ArgSize = isa(VL0) ? IntrinsicNumOperands : NumOperands; OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); - for (unsigned OpIdx : seq(NumOperands)) + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { OpsVec[OpIdx].resize(NumLanes); - for (auto [Lane, V] : enumerate(VL)) { - assert((isa(V) || isa(V)) && - "Expected instruction or poison value"); - if (isa(V)) { - for (unsigned OpIdx : seq(NumOperands)) + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + assert((isa(VL[Lane]) || isa(VL[Lane])) && + "Expected instruction or poison value"); + // Our tree has just 3 nodes: the root and two operands. + // It is therefore trivial to get the APO. We only need to check the + // opcode of VL[Lane] and whether the operand at OpIdx is the LHS or + // RHS operand. The LHS operand of both add and sub is never attached + // to an inversese operation in the linearized form, therefore its APO + // is false. The RHS is true only if VL[Lane] is an inverse operation. + + // Since operand reordering is performed on groups of commutative + // operations or alternating sequences (e.g., +, -), we can safely + // tell the inverse operations by checking commutativity. + if (isa(VL[Lane])) { OpsVec[OpIdx][Lane] = { - PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true, + PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true, false}; - continue; - } - auto [SelectedOp, Ops] = - getInterchangeableInstruction(cast(V), MainOp, AltOp); - // Our tree has just 3 nodes: the root and two operands. - // It is therefore trivial to get the APO. We only need to check the - // opcode of V and whether the operand at OpIdx is the LHS or RHS - // operand. The LHS operand of both add and sub is never attached to an - // inversese operation in the linearized form, therefore its APO is - // false. The RHS is true only if V is an inverse operation. - - // Since operand reordering is performed on groups of commutative - // operations or alternating sequences (e.g., +, -), we can safely - // tell the inverse operations by checking commutativity. - bool IsInverseOperation = !isCommutative(cast(SelectedOp)); - for (unsigned OpIdx : seq(NumOperands)) { + continue; + } + bool IsInverseOperation = !isCommutative(cast(VL[Lane])); bool APO = (OpIdx == 0) ? false : IsInverseOperation; - OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false}; + OpsVec[OpIdx][Lane] = {cast(VL[Lane])->getOperand(OpIdx), + APO, false}; } } } @@ -2720,12 +2549,11 @@ class BoUpSLP { public: /// Initialize with all the operands of the instruction vector \p RootVL. - VLOperands(ArrayRef RootVL, Instruction *MainOp, - Instruction *AltOp, const BoUpSLP &R) + VLOperands(ArrayRef RootVL, Instruction *VL0, const BoUpSLP &R) : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R), - L(R.LI->getLoopFor(MainOp->getParent())) { + L(R.LI->getLoopFor((VL0->getParent()))) { // Append all the operands of RootVL. - appendOperandsOfVL(RootVL, MainOp, AltOp); + appendOperandsOfVL(RootVL, VL0); } /// \Returns a value vector with the operands across all lanes for the @@ -3517,7 +3345,7 @@ class BoUpSLP { /// Set this bundle's operand from Scalars. void setOperand(const BoUpSLP &R, bool RequireReorder = false) { - VLOperands Ops(Scalars, MainOp, AltOp, R); + VLOperands Ops(Scalars, MainOp, R); if (RequireReorder) Ops.reorder(); for (unsigned I : seq(MainOp->getNumOperands())) @@ -8733,7 +8561,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; - VLOperands Ops(VL, VL0, S.getAltOp(), *this); + VLOperands Ops(VL, VL0, *this); if (cast(VL0)->isCommutative()) { // Commutative predicate - collect + sort operands of the instructions // so that each side is more likely to have the same opcode. @@ -15791,7 +15619,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *V = Builder.CreateBinOp( static_cast(E->getOpcode()), LHS, RHS); - propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end()); + propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end()); if (auto *I = dyn_cast(V)) { V = ::propagateMetadata(I, E->Scalars); // Drop nuw flags for abs(sub(commutative), true). diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll index c65df26fa0d5cc..feb4ad865f3147 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll @@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) { ; ; POW2-ONLY-LABEL: @store_try_reorder( ; POW2-ONLY-NEXT: entry: -; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 -; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0 -; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2 -; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4 +; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0 +; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 +; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 +; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 ; POW2-ONLY-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll index 74d7f1c91f3bff..fd3d4ab80b29cc 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll @@ -7,18 +7,19 @@ define void @test(ptr %a, i64 %0) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> , i64 [[TMP0]], i32 0 ; CHECK-NEXT: br label %[[BB:.*]] ; CHECK: [[BB]]: -; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[A]], align 8 -; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[A]], align 8 +; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison) +; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP9]], [[TMP11]] -; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP12]], ptr align 8 [[TMP8]], i64 -8, <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> splat (i1 true), i32 2) ; CHECK-NEXT: br label %[[BB]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll index 89b87a3a45d12c..7ab5e4d6cb787e 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll @@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) { ; ; POW2-ONLY-LABEL: @store_try_reorder( ; POW2-ONLY-NEXT: entry: -; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 -; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0 -; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2 -; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4 +; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0 +; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 +; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 +; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 ; POW2-ONLY-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll index e3b4898e852126..f46a5d84a86cc9 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -10,8 +10,10 @@ define i32 @foo(ptr nocapture %A, i32 %n) { ; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar() ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 9) +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 ; CHECK-NEXT: ret i32 undef ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll index 7af0c64f187480..889f5a95c81d69 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll @@ -4,17 +4,22 @@ define void @test(ptr %0, ptr %1, ptr %2) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 4 -; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 -; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> , [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP8]], [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[TMP5]] -; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> , [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], zeroinitializer -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> -; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP2:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]] +; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer +; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> +; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4 ; CHECK-NEXT: ret void ; %4 = load i32, ptr %1, align 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll index d474a5f2cecae0..c976525b6720eb 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll @@ -4,17 +4,21 @@ define i64 @foo(i32 %tmp7) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> , i32 [[TMP7:%.*]], i32 3 -; CHECK-NEXT: [[TMP1:%.*]] = sub <8 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> , <8 x i32> [[TMP1]], <8 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <8 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP8]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP9]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[TMP5:%.*]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP24:%.*]] = sub i32 undef, 0 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> , i32 [[TMP24]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 0, i32 5 +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> , i32 [[TMP24]], i32 6 +; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]]) +; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP10]], 0 ; CHECK-NEXT: [[TMP64:%.*]] = zext i32 [[OP_RDX]] to i64 ; CHECK-NEXT: ret i64 [[TMP64]] ; @@ -25,7 +29,7 @@ bb: %tmp4 = xor i32 %tmp3, 0 %tmp6 = sub i32 0, 0 %tmp8 = sub i32 %tmp7, 0 - %tmp9 = sub nsw i32 0, poison + %tmp9 = sub nsw i32 0, undef %tmp10 = add nsw i32 0, %tmp6 %tmp11 = sub nsw i32 0, %tmp8 %tmp12 = add i32 0, %tmp10 @@ -40,10 +44,10 @@ bb: %tmp21 = add i32 %tmp20, %tmp17 %tmp22 = sub i32 0, 0 %tmp23 = add i32 0, 0 - %tmp24 = sub i32 poison, 0 - %tmp25 = add nsw i32 %tmp23, poison + %tmp24 = sub i32 undef, 0 + %tmp25 = add nsw i32 %tmp23, undef %tmp26 = add nsw i32 %tmp24, %tmp22 - %tmp27 = sub nsw i32 poison, %tmp24 + %tmp27 = sub nsw i32 undef, %tmp24 %tmp28 = add i32 0, %tmp25 %tmp29 = xor i32 %tmp28, 0 %tmp30 = add i32 0, %tmp26 @@ -54,7 +58,7 @@ bb: %tmp35 = add i32 %tmp34, %tmp29 %tmp36 = add i32 %tmp35, 0 %tmp37 = add i32 %tmp36, %tmp33 - %tmp38 = sub nsw i32 0, poison + %tmp38 = sub nsw i32 0, undef %tmp39 = add i32 0, %tmp38 %tmp40 = xor i32 %tmp39, 0 %tmp41 = add i32 0, %tmp37 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll index fc62b0b38fd535..02c3173adc654f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -9,10 +9,12 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[SHUFFLE]], -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 9) -; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[A:%.*]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9) +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]] ; CHECK-NEXT: [[EXTERNALUSE2:%.*]] = mul nsw i32 [[TMP6]], [[M]] ; CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 [[EXTERNALUSE1]], [[EXTERNALUSE2]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll index daab4b6ea4c957..2a5bfa73907704 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll @@ -8,8 +8,10 @@ define i32 @test() { ; CHECK-NEXT: [[TMP10:%.*]] = or i8 [[A_PROMOTED]], 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i16> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i16> ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll index 94f2c79faa8c93..e6a166c27ac494 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll @@ -9,7 +9,9 @@ define i32 @foo() { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> , i32 [[D]], i32 1 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]] -; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> +; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4 ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll index 15dd6756cd7dbb..6e2a43ac5f9f10 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll @@ -242,18 +242,13 @@ exit: } define void @store_try_reorder(ptr %dst) { -; NON-POW2-LABEL: @store_try_reorder( -; NON-POW2-NEXT: entry: -; NON-POW2-NEXT: store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 -; NON-POW2-NEXT: ret void -; -; POW2-ONLY-LABEL: @store_try_reorder( -; POW2-ONLY-NEXT: entry: -; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 -; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0 -; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2 -; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4 -; POW2-ONLY-NEXT: ret void +; CHECK-LABEL: @store_try_reorder( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0 +; CHECK-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 +; CHECK-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 +; CHECK-NEXT: ret void ; entry: %add = add i32 0, 0 diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll index e4eff0f72b3565..c250029519590f 100644 --- a/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll +++ b/llvm/test/Transforms/SLPVectorizer/alternate-opcode-sindle-bv.ll @@ -1,29 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=X86 %} -; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=AARCH64 %} +; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s %} +; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s %} define <2 x i32> @test(i32 %arg) { -; X86-LABEL: define <2 x i32> @test( -; X86-SAME: i32 [[ARG:%.*]]) { -; X86-NEXT: bb: -; X86-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0 -; X86-NEXT: [[MUL:%.*]] = mul i32 0, 1 -; X86-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]] -; X86-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]] -; X86-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0 -; X86-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1 -; X86-NEXT: ret <2 x i32> [[TMP1]] -; -; AARCH64-LABEL: define <2 x i32> @test( -; AARCH64-SAME: i32 [[ARG:%.*]]) { -; AARCH64-NEXT: bb: -; AARCH64-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[ARG]], i32 0 -; AARCH64-NEXT: [[TMP1:%.*]] = or <2 x i32> [[TMP0]], zeroinitializer -; AARCH64-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 -; AARCH64-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 -; AARCH64-NEXT: [[MUL1:%.*]] = mul i32 [[TMP2]], [[TMP3]] -; AARCH64-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]] -; AARCH64-NEXT: ret <2 x i32> [[TMP1]] +; CHECK-LABEL: define <2 x i32> @test( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[ARG]], 0 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1 +; CHECK-NEXT: [[MUL1:%.*]] = mul i32 [[OR]], [[MUL]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 0, [[MUL1]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[OR]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[MUL]], i32 1 +; CHECK-NEXT: ret <2 x i32> [[TMP1]] ; bb: %or = or i32 %arg, 0 @@ -34,3 +23,4 @@ bb: %1 = insertelement <2 x i32> %0, i32 %mul, i32 1 ret <2 x i32> %1 } + diff --git a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll index 6f9768af38caf6..61a84a67c9ff19 100644 --- a/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll +++ b/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll @@ -5,13 +5,15 @@ define void @func(i32 %0) { ; CHECK-LABEL: define void @func( ; CHECK-SAME: i32 [[TMP0:%.*]]) { ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> , i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP0]], 0 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0 -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64 ; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <32 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <32 x i32> [[TMP11]], <32 x i32> , <32 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <32 x i32> [[TMP12]], i32 0, i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP13]], <8 x i32> zeroinitializer, i64 16) @@ -22,61 +24,61 @@ define void @func(i32 %0) { ; CHECK-NEXT: [[TMP19:%.*]] = sext <32 x i32> [[TMP18]] to <32 x i64> ; CHECK-NEXT: [[TMP20:%.*]] = icmp slt <32 x i64> [[TMP19]], zeroinitializer ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <32 x i1> [[TMP20]], i32 31 -; CHECK-NEXT: [[TMP76:%.*]] = and i1 false, [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = and i1 false, [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i1> [[TMP20]], i32 30 -; CHECK-NEXT: [[TMP22:%.*]] = and i1 false, [[TMP23]] +; CHECK-NEXT: [[TMP24:%.*]] = and i1 false, [[TMP23]] ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i1> [[TMP20]], i32 29 -; CHECK-NEXT: [[TMP24:%.*]] = and i1 false, [[TMP25]] +; CHECK-NEXT: [[TMP26:%.*]] = and i1 false, [[TMP25]] ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <32 x i1> [[TMP20]], i32 28 -; CHECK-NEXT: [[TMP26:%.*]] = and i1 false, [[TMP27]] +; CHECK-NEXT: [[TMP28:%.*]] = and i1 false, [[TMP27]] ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i1> [[TMP20]], i32 27 -; CHECK-NEXT: [[TMP28:%.*]] = and i1 false, [[TMP29]] +; CHECK-NEXT: [[TMP30:%.*]] = and i1 false, [[TMP29]] ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i1> [[TMP20]], i32 26 -; CHECK-NEXT: [[TMP30:%.*]] = and i1 false, [[TMP31]] +; CHECK-NEXT: [[TMP32:%.*]] = and i1 false, [[TMP31]] ; CHECK-NEXT: [[TMP33:%.*]] = extractelement <32 x i1> [[TMP20]], i32 25 -; CHECK-NEXT: [[TMP32:%.*]] = and i1 false, [[TMP33]] +; CHECK-NEXT: [[TMP34:%.*]] = and i1 false, [[TMP33]] ; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i1> [[TMP20]], i32 24 -; CHECK-NEXT: [[TMP34:%.*]] = and i1 false, [[TMP35]] +; CHECK-NEXT: [[TMP36:%.*]] = and i1 false, [[TMP35]] ; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i1> [[TMP20]], i32 23 -; CHECK-NEXT: [[TMP36:%.*]] = and i1 false, [[TMP37]] +; CHECK-NEXT: [[TMP38:%.*]] = and i1 false, [[TMP37]] ; CHECK-NEXT: [[TMP39:%.*]] = extractelement <32 x i1> [[TMP20]], i32 22 -; CHECK-NEXT: [[TMP38:%.*]] = and i1 false, [[TMP39]] +; CHECK-NEXT: [[TMP40:%.*]] = and i1 false, [[TMP39]] ; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i1> [[TMP20]], i32 21 -; CHECK-NEXT: [[TMP40:%.*]] = and i1 false, [[TMP41]] +; CHECK-NEXT: [[TMP42:%.*]] = and i1 false, [[TMP41]] ; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i1> [[TMP20]], i32 20 -; CHECK-NEXT: [[TMP42:%.*]] = and i1 false, [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = and i1 false, [[TMP43]] ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <32 x i1> [[TMP20]], i32 19 -; CHECK-NEXT: [[TMP44:%.*]] = and i1 false, [[TMP45]] +; CHECK-NEXT: [[TMP46:%.*]] = and i1 false, [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i1> [[TMP20]], i32 18 -; CHECK-NEXT: [[TMP46:%.*]] = and i1 false, [[TMP47]] +; CHECK-NEXT: [[TMP48:%.*]] = and i1 false, [[TMP47]] ; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i1> [[TMP20]], i32 17 -; CHECK-NEXT: [[TMP48:%.*]] = and i1 false, [[TMP49]] +; CHECK-NEXT: [[TMP50:%.*]] = and i1 false, [[TMP49]] ; CHECK-NEXT: [[TMP51:%.*]] = extractelement <32 x i1> [[TMP20]], i32 16 -; CHECK-NEXT: [[TMP50:%.*]] = and i1 false, [[TMP51]] +; CHECK-NEXT: [[TMP52:%.*]] = and i1 false, [[TMP51]] ; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i1> [[TMP20]], i32 15 -; CHECK-NEXT: [[TMP52:%.*]] = and i1 false, [[TMP53]] +; CHECK-NEXT: [[TMP54:%.*]] = and i1 false, [[TMP53]] ; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i1> [[TMP20]], i32 14 -; CHECK-NEXT: [[TMP54:%.*]] = and i1 false, [[TMP55]] +; CHECK-NEXT: [[TMP56:%.*]] = and i1 false, [[TMP55]] ; CHECK-NEXT: [[TMP57:%.*]] = extractelement <32 x i1> [[TMP20]], i32 13 -; CHECK-NEXT: [[TMP56:%.*]] = and i1 false, [[TMP57]] +; CHECK-NEXT: [[TMP58:%.*]] = and i1 false, [[TMP57]] ; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i1> [[TMP20]], i32 12 -; CHECK-NEXT: [[TMP58:%.*]] = and i1 false, [[TMP59]] +; CHECK-NEXT: [[TMP60:%.*]] = and i1 false, [[TMP59]] ; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i1> [[TMP20]], i32 11 -; CHECK-NEXT: [[TMP60:%.*]] = and i1 false, [[TMP61]] +; CHECK-NEXT: [[TMP62:%.*]] = and i1 false, [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = extractelement <32 x i1> [[TMP20]], i32 10 -; CHECK-NEXT: [[TMP62:%.*]] = and i1 false, [[TMP63]] +; CHECK-NEXT: [[TMP64:%.*]] = and i1 false, [[TMP63]] ; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i1> [[TMP20]], i32 9 -; CHECK-NEXT: [[TMP64:%.*]] = and i1 false, [[TMP65]] +; CHECK-NEXT: [[TMP66:%.*]] = and i1 false, [[TMP65]] ; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i1> [[TMP20]], i32 8 -; CHECK-NEXT: [[TMP66:%.*]] = and i1 false, [[TMP67]] +; CHECK-NEXT: [[TMP68:%.*]] = and i1 false, [[TMP67]] ; CHECK-NEXT: [[TMP69:%.*]] = extractelement <32 x i1> [[TMP20]], i32 7 -; CHECK-NEXT: [[TMP68:%.*]] = and i1 false, [[TMP69]] +; CHECK-NEXT: [[TMP70:%.*]] = and i1 false, [[TMP69]] ; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i1> [[TMP20]], i32 6 -; CHECK-NEXT: [[TMP70:%.*]] = and i1 false, [[TMP71]] +; CHECK-NEXT: [[TMP72:%.*]] = and i1 false, [[TMP71]] ; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i1> [[TMP20]], i32 5 -; CHECK-NEXT: [[TMP72:%.*]] = and i1 false, [[TMP73]] +; CHECK-NEXT: [[TMP74:%.*]] = and i1 false, [[TMP73]] ; CHECK-NEXT: [[TMP75:%.*]] = extractelement <32 x i1> [[TMP20]], i32 4 -; CHECK-NEXT: [[TMP74:%.*]] = and i1 false, [[TMP75]] +; CHECK-NEXT: [[TMP76:%.*]] = and i1 false, [[TMP75]] ; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i32> [[TMP18]], i32 0 ; CHECK-NEXT: [[TMP78:%.*]] = zext i32 [[TMP77]] to i64 ; CHECK-NEXT: [[TMP79:%.*]] = getelementptr float, ptr addrspace(1) null, i64 [[TMP78]] diff --git a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll index 1e3255f2187af0..732b50396a460d 100644 --- a/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll +++ b/llvm/test/Transforms/SLPVectorizer/shuffle-mask-resized.ll @@ -12,7 +12,9 @@ define i32 @test() { ; CHECK-NEXT: br i1 false, label [[BB4:%.*]], label [[BB3]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> , <2 x i32> -; CHECK-NEXT: [[TMP5]] = or <2 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP5]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb4: ; CHECK-NEXT: [[TMP6:%.*]] = phi <8 x i32> [ [[TMP1]], [[BB1]] ]