diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 4a89f7dd8672e..9b727a7998392 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8369,6 +8369,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, std::unique_ptr(VPlan0->duplicate()), SubRange, &LVer)) { // Now optimize the initial VPlan. VPlanTransforms::hoistPredicatedLoads(*Plan, *PSE.getSE(), OrigLoop); + VPlanTransforms::sinkPredicatedStores(*Plan, *PSE.getSE(), OrigLoop); VPlanTransforms::runPass(VPlanTransforms::truncateToMinimalBitwidths, *Plan, CM.getMinimalBitwidths()); VPlanTransforms::runPass(VPlanTransforms::optimize, *Plan); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 827dd4b6439ae..38024aa6897fc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -139,35 +139,51 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( return true; } -// Check if a load can be hoisted by verifying it doesn't alias with any stores -// in blocks between FirstBB and LastBB using scoped noalias metadata. -static bool canHoistLoadWithNoAliasCheck(VPReplicateRecipe *Load, - VPBasicBlock *FirstBB, - VPBasicBlock *LastBB) { - // Get the load's memory location and check if it aliases with any stores - // using scoped noalias metadata. - auto LoadLoc = vputils::getMemoryLocation(*Load); - if (!LoadLoc || !LoadLoc->AATags.Scope) +// Check if a memory operation doesn't alias with memory operations in blocks +// between FirstBB and LastBB using scoped noalias metadata. +// For load hoisting, we only check writes in one direction. +// For store sinking, we check both reads and writes bidirectionally. +static bool canHoistOrSinkWithNoAliasCheck( + const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, + bool CheckReads, + const SmallPtrSetImpl *ExcludeRecipes = nullptr) { + if (!MemLoc.AATags.Scope) return false; - const AAMDNodes &LoadAA = LoadLoc->AATags; + const AAMDNodes &MemAA = MemLoc.AATags; + for (VPBlockBase *Block = FirstBB; Block; Block = Block->getSingleSuccessor()) { - // This function assumes a simple linear chain of blocks. If there are - // multiple successors, we would need more complex analysis. assert(Block->getNumSuccessors() <= 1 && "Expected at most one successor in block chain"); auto *VPBB = cast(Block); for (VPRecipeBase &R : *VPBB) { - if (R.mayWriteToMemory()) { - auto Loc = vputils::getMemoryLocation(R); - // Bail out if we can't get the location or if the scoped noalias - // metadata indicates potential aliasing. - if (!Loc || ScopedNoAliasAAResult::mayAliasInScopes( - LoadAA.Scope, Loc->AATags.NoAlias)) - return false; - } + if (ExcludeRecipes && ExcludeRecipes->contains(&R)) + continue; + + // Skip recipes that don't need checking. + if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory())) + continue; + + auto Loc = vputils::getMemoryLocation(R); + if (!Loc) + // Conservatively assume aliasing for memory operations without + // location. + return false; + + // For reads, check if they don't alias in the reverse direction and + // skip if so. + if (CheckReads && R.mayReadFromMemory() && + !ScopedNoAliasAAResult::mayAliasInScopes(Loc->AATags.Scope, + MemAA.NoAlias)) + continue; + + // Check if the memory operations may alias in the forward direction. + if (ScopedNoAliasAAResult::mayAliasInScopes(MemAA.Scope, + Loc->AATags.NoAlias)) + return false; } + if (Block == LastBB) break; } @@ -4135,119 +4151,217 @@ void VPlanTransforms::hoistInvariantLoads(VPlan &Plan) { } } -// Returns the intersection of metadata from a group of loads. -static VPIRMetadata getCommonLoadMetadata(ArrayRef Loads) { - VPIRMetadata CommonMetadata = *Loads.front(); - for (VPReplicateRecipe *Load : drop_begin(Loads)) - CommonMetadata.intersect(*Load); +// Collect common metadata from a group of replicate recipes by intersecting +// metadata from all recipes in the group. +static VPIRMetadata getCommonMetadata(ArrayRef Recipes) { + VPIRMetadata CommonMetadata = *Recipes.front(); + for (VPReplicateRecipe *Recipe : drop_begin(Recipes)) + CommonMetadata.intersect(*Recipe); return CommonMetadata; } -void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, - const Loop *L) { +template +static SmallVector> +collectComplementaryPredicatedMemOps(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store, + "Only Load and Store opcodes supported"); + constexpr bool IsLoad = (Opcode == Instruction::Load); VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); VPTypeAnalysis TypeInfo(Plan); - VPDominatorTree VPDT(Plan); - // Group predicated loads by their address SCEV. - DenseMap> LoadsByAddress; + // Group predicated operations by their address SCEV. + DenseMap> RecipesByAddress; for (VPBlockBase *Block : vp_depth_first_shallow(LoopRegion->getEntry())) { auto *VPBB = cast(Block); for (VPRecipeBase &R : *VPBB) { auto *RepR = dyn_cast(&R); - if (!RepR || RepR->getOpcode() != Instruction::Load || - !RepR->isPredicated()) + if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated()) continue; - VPValue *Addr = RepR->getOperand(0); + // For loads, operand 0 is address; for stores, operand 1 is address. + VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1); const SCEV *AddrSCEV = vputils::getSCEVExprForVPValue(Addr, SE, L); if (!isa(AddrSCEV)) - LoadsByAddress[AddrSCEV].push_back(RepR); + RecipesByAddress[AddrSCEV].push_back(RepR); } } - // For each address, collect loads with complementary masks, sort by - // dominance, and use the earliest load. - for (auto &[Addr, Loads] : LoadsByAddress) { - if (Loads.size() < 2) + // For each address, collect operations with the same or complementary masks. + SmallVector> AllGroups; + auto GetLoadStoreValueType = [&](VPReplicateRecipe *Recipe) { + return TypeInfo.inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0)); + }; + for (auto &[Addr, Recipes] : RecipesByAddress) { + if (Recipes.size() < 2) continue; - // Collect groups of loads with complementary masks. - SmallVector> LoadGroups; - for (VPReplicateRecipe *&LoadI : Loads) { - if (!LoadI) + // Collect groups with the same or complementary masks. + for (VPReplicateRecipe *&RecipeI : Recipes) { + if (!RecipeI) continue; - VPValue *MaskI = LoadI->getMask(); - Type *TypeI = TypeInfo.inferScalarType(LoadI); + VPValue *MaskI = RecipeI->getMask(); + Type *TypeI = GetLoadStoreValueType(RecipeI); SmallVector Group; - Group.push_back(LoadI); - LoadI = nullptr; + Group.push_back(RecipeI); + RecipeI = nullptr; - // Find all loads with the same type. - for (VPReplicateRecipe *&LoadJ : Loads) { - if (!LoadJ) + // Find all operations with the same or complementary masks. + bool HasComplementaryMask = false; + for (VPReplicateRecipe *&RecipeJ : Recipes) { + if (!RecipeJ) continue; - Type *TypeJ = TypeInfo.inferScalarType(LoadJ); + VPValue *MaskJ = RecipeJ->getMask(); + Type *TypeJ = GetLoadStoreValueType(RecipeJ); if (TypeI == TypeJ) { - Group.push_back(LoadJ); - LoadJ = nullptr; + // Check if any operation in the group has a complementary mask with + // another, that is M1 == NOT(M2) or M2 == NOT(M1). + HasComplementaryMask |= match(MaskI, m_Not(m_Specific(MaskJ))) || + match(MaskJ, m_Not(m_Specific(MaskI))); + Group.push_back(RecipeJ); + RecipeJ = nullptr; } } - // Check if any load in the group has a complementary mask with another, - // that is M1 == NOT(M2) or M2 == NOT(M1). - bool HasComplementaryMask = - any_of(drop_begin(Group), [MaskI](VPReplicateRecipe *Load) { - VPValue *MaskJ = Load->getMask(); - return match(MaskI, m_Not(m_Specific(MaskJ))) || - match(MaskJ, m_Not(m_Specific(MaskI))); - }); + if (HasComplementaryMask) { + assert(Group.size() >= 2 && "must have at least 2 entries"); + AllGroups.push_back(std::move(Group)); + } + } + } + + return AllGroups; +} + +// Find the recipe with minimum alignment in the group. +template +static VPReplicateRecipe * +findRecipeWithMinAlign(ArrayRef Group) { + return *min_element(Group, [](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return cast(A->getUnderlyingInstr())->getAlign() < + cast(B->getUnderlyingInstr())->getAlign(); + }); +} + +void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + auto Groups = + collectComplementaryPredicatedMemOps(Plan, SE, L); + if (Groups.empty()) + return; + + VPDominatorTree VPDT(Plan); - if (HasComplementaryMask) - LoadGroups.push_back(std::move(Group)); + // Process each group of loads. + for (auto &Group : Groups) { + // Sort loads by dominance order, with earliest (most dominating) first. + sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return VPDT.properlyDominates(A, B); + }); + + // Try to use the earliest (most dominating) load to replace all others. + VPReplicateRecipe *EarliestLoad = Group[0]; + VPBasicBlock *FirstBB = EarliestLoad->getParent(); + VPBasicBlock *LastBB = Group.back()->getParent(); + + // Check that the load doesn't alias with stores between first and last. + auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad); + if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB, + /*CheckReads=*/false)) + continue; + + // Collect common metadata from all loads in the group. + VPIRMetadata CommonMetadata = getCommonMetadata(Group); + + // Find the load with minimum alignment to use. + auto *LoadWithMinAlign = findRecipeWithMinAlign(Group); + + // Create an unpredicated version of the earliest load with common + // metadata. + auto *UnpredicatedLoad = new VPReplicateRecipe( + LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)}, + /*IsSingleScalar=*/false, /*Mask=*/nullptr, *EarliestLoad, + CommonMetadata); + + UnpredicatedLoad->insertBefore(EarliestLoad); + + // Replace all loads in the group with the unpredicated load. + for (VPReplicateRecipe *Load : Group) { + Load->replaceAllUsesWith(UnpredicatedLoad); + Load->eraseFromParent(); } + } +} - // For each group, check memory dependencies and hoist the earliest load. - for (auto &Group : LoadGroups) { - // Sort loads by dominance order, with earliest (most dominating) first. - sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { - return VPDT.properlyDominates(A, B); - }); +static bool +canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink) { + auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front()); + if (!StoreLoc || !StoreLoc->AATags.Scope) + return false; - VPReplicateRecipe *EarliestLoad = Group.front(); - VPBasicBlock *FirstBB = EarliestLoad->getParent(); - VPBasicBlock *LastBB = Group.back()->getParent(); + // When sinking a group of stores, all members of the group alias each other. + // Skip them during the alias checks. + SmallPtrSet StoresToSinkSet(StoresToSink.begin(), + StoresToSink.end()); - // Check that the load doesn't alias with stores between first and last. - if (!canHoistLoadWithNoAliasCheck(EarliestLoad, FirstBB, LastBB)) - continue; + VPBasicBlock *FirstBB = StoresToSink.front()->getParent(); + VPBasicBlock *LastBB = StoresToSink.back()->getParent(); + return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, + /*CheckReads=*/true, &StoresToSinkSet); +} - // Find the load with minimum alignment to use. - auto *LoadWithMinAlign = - *min_element(Group, [](VPReplicateRecipe *A, VPReplicateRecipe *B) { - return cast(A->getUnderlyingInstr())->getAlign() < - cast(B->getUnderlyingInstr())->getAlign(); - }); +void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, + const Loop *L) { + auto Groups = + collectComplementaryPredicatedMemOps(Plan, SE, L); + if (Groups.empty()) + return; - // Collect common metadata from all loads in the group. - VPIRMetadata CommonMetadata = getCommonLoadMetadata(Group); - - // Create an unpredicated load with minimum alignment using the earliest - // dominating address and common metadata. - auto *UnpredicatedLoad = new VPReplicateRecipe( - LoadWithMinAlign->getUnderlyingInstr(), EarliestLoad->getOperand(0), - /*IsSingleScalar=*/false, /*Mask=*/nullptr, /*Flags=*/{}, - CommonMetadata); - UnpredicatedLoad->insertBefore(EarliestLoad); - - // Replace all loads in the group with the unpredicated load. - for (VPReplicateRecipe *Load : Group) { - Load->replaceAllUsesWith(UnpredicatedLoad); - Load->eraseFromParent(); - } + VPDominatorTree VPDT(Plan); + + for (auto &Group : Groups) { + sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { + return VPDT.properlyDominates(A, B); + }); + + if (!canSinkStoreWithNoAliasCheck(Group)) + continue; + + // Use the last (most dominated) store's location for the unconditional + // store. + VPReplicateRecipe *LastStore = Group.back(); + VPBasicBlock *InsertBB = LastStore->getParent(); + + // Collect common alias metadata from all stores in the group. + VPIRMetadata CommonMetadata = getCommonMetadata(Group); + + // Build select chain for stored values. + VPValue *SelectedValue = Group[0]->getOperand(0); + VPBuilder Builder(InsertBB, LastStore->getIterator()); + + for (unsigned I = 1; I < Group.size(); ++I) { + VPValue *Mask = Group[I]->getMask(); + VPValue *Value = Group[I]->getOperand(0); + SelectedValue = Builder.createSelect(Mask, Value, SelectedValue, + Group[I]->getDebugLoc()); } + + // Find the store with minimum alignment to use. + auto *StoreWithMinAlign = findRecipeWithMinAlign(Group); + + // Create unconditional store with selected value and common metadata. + auto *UnpredicatedStore = + new VPReplicateRecipe(StoreWithMinAlign->getUnderlyingInstr(), + {SelectedValue, LastStore->getOperand(1)}, + /*IsSingleScalar=*/false, + /*Mask=*/nullptr, *LastStore, CommonMetadata); + UnpredicatedStore->insertBefore(*InsertBB, LastStore->getIterator()); + + // Remove all predicated stores from the group. + for (VPReplicateRecipe *Store : Group) + Store->eraseFromParent(); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ae3797dee1f07..afdf1655b4622 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -325,6 +325,13 @@ struct VPlanTransforms { static void hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, const Loop *L); + /// Sink predicated stores to the same address with complementary predicates + /// (P and NOT P) to an unconditional store with select recipes for the + /// stored values. This eliminates branching overhead when all paths + /// unconditionally store to the same location. + static void sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, + const Loop *L); + // Materialize vector trip counts for constants early if it can simply be // computed as (Original TC / VF * UF) * VF * UF. static void diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index 87942911e915f..ae772da8862b3 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -21,13 +21,12 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META3:![0-9]+]] @@ -35,39 +34,14 @@ define void @test_stores_noalias_via_rt_checks_after_loads(ptr %dst, ptr %src, p ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP16]], i32 1 ; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP17]], splat (i32 5) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] -; CHECK: [[PRED_STORE_IF6]]: ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 -; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; CHECK: [[PRED_STORE_CONTINUE7]]: -; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP17]], splat (i32 10) -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; CHECK: [[PRED_STORE_IF8]]: -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 -; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_CONTINUE9]]: -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] -; CHECK: [[PRED_STORE_IF10]]: -; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 -; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META5]], !noalias [[META7]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] -; CHECK: [[PRED_STORE_CONTINUE11]]: +; CHECK-NEXT: [[TMP14:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP14]], i32 0 +; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP21]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP14]], i32 1 +; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP24]], align 4, !alias.scope [[META5]], !noalias [[META7]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -134,7 +108,7 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE21:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -162,57 +136,32 @@ define void @test_aliasing_store(ptr %dst, ptr %src, ptr %cond) { ; CHECK: [[PRED_LOAD_CONTINUE11]]: ; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF10]] ] ; CHECK-NEXT: [[TMP19:%.*]] = sub <2 x i32> [[TMP18]], splat (i32 5) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] -; CHECK: [[PRED_STORE_IF12]]: -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 -; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META19]], !noalias [[META12]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]] -; CHECK: [[PRED_STORE_CONTINUE13]]: ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15:.*]] -; CHECK: [[PRED_LOAD_IF14]]: +; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] +; CHECK: [[PRED_LOAD_IF12]]: ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> poison, i32 [[TMP28]], i32 0 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]] -; CHECK: [[PRED_LOAD_CONTINUE15]]: -; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_STORE_CONTINUE13]] ], [ [[TMP29]], %[[PRED_LOAD_IF14]] ] +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]] +; CHECK: [[PRED_LOAD_CONTINUE13]]: +; CHECK-NEXT: [[TMP30:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP29]], %[[PRED_LOAD_IF12]] ] ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF16:.*]], label %[[PRED_LOAD_CONTINUE17:.*]] -; CHECK: [[PRED_LOAD_IF16]]: +; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]] +; CHECK: [[PRED_LOAD_IF14]]: ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4, !alias.scope [[META15]], !noalias [[META17]] ; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> [[TMP30]], i32 [[TMP33]], i32 1 -; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE17]] -; CHECK: [[PRED_LOAD_CONTINUE17]]: -; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP34]], %[[PRED_LOAD_IF16]] ] +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]] +; CHECK: [[PRED_LOAD_CONTINUE15]]: +; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ [[TMP30]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP34]], %[[PRED_LOAD_IF14]] ] ; CHECK-NEXT: [[TMP36:%.*]] = add <2 x i32> [[TMP35]], splat (i32 10) -; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] -; CHECK: [[PRED_STORE_IF18]]: -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP36]], i32 0 -; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP38]], align 4, !alias.scope [[META19]], !noalias [[META12]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE19]] -; CHECK: [[PRED_STORE_CONTINUE19]]: -; CHECK-NEXT: [[TMP40:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21]] -; CHECK: [[PRED_STORE_IF20]]: +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i32> [[TMP36]], i32 1 -; CHECK-NEXT: store i32 [[TMP42]], ptr [[TMP41]], align 4, !alias.scope [[META19]], !noalias [[META12]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE21]] -; CHECK: [[PRED_STORE_CONTINUE21]]: +; CHECK-NEXT: [[TMP37:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP36]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x i32> [[TMP37]], i32 0 +; CHECK-NEXT: store i32 [[TMP38]], ptr [[TMP40]], align 4, !alias.scope [[META19:![0-9]+]], !noalias [[META12]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i32> [[TMP37]], i32 1 +; CHECK-NEXT: store i32 [[TMP39]], ptr [[TMP41]], align 4, !alias.scope [[META19]], !noalias [[META12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP43]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -289,13 +238,12 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -304,7 +252,7 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 10, ptr [[TMP16]], align 4, !alias.scope [[META25]], !noalias [[META27]] @@ -317,39 +265,14 @@ define void @test_noalias_store_via_runtime_checks(ptr %dst, ptr %dst.1, ptr %sr ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = sub <2 x i32> [[TMP19]], splat (i32 5) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: +; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] -; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 -; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] -; CHECK: [[PRED_STORE_CONTINUE24]]: -; CHECK-NEXT: [[TMP38:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) -; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] -; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x i32> [[TMP38]], i32 0 -; CHECK-NEXT: store i32 [[TMP41]], ptr [[TMP40]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] -; CHECK: [[PRED_STORE_CONTINUE26]]: -; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_IF27]]: -; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i32> [[TMP38]], i32 1 -; CHECK-NEXT: store i32 [[TMP44]], ptr [[TMP43]], align 4, !alias.scope [[META31]], !noalias [[META32]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_CONTINUE28]]: +; CHECK-NEXT: [[TMP22:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP38]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP22]], i32 0 +; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META32:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i32> [[TMP22]], i32 1 +; CHECK-NEXT: store i32 [[TMP20]], ptr [[TMP26]], align 4, !alias.scope [[META31]], !noalias [[META32]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP45:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP45]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] @@ -418,7 +341,7 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE17:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE15:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] @@ -446,40 +369,31 @@ define void @test_memory_op_between_loads_alias(ptr %dst, ptr %src, ptr %cond, p ; CHECK: [[PRED_LOAD_CONTINUE11]]: ; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF10]] ] ; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP18]], splat (i32 10) -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP19]], i32 0 -; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP21]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF12:.*]], label %[[PRED_STORE_CONTINUE13:.*]] -; CHECK: [[PRED_STORE_IF12]]: -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i32> [[TMP19]], i32 1 -; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP24]], align 4, !alias.scope [[META42]], !noalias [[META35]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE13]] -; CHECK: [[PRED_STORE_CONTINUE13]]: ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] -; CHECK: [[PRED_STORE_IF14]]: +; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] +; CHECK: [[PRED_LOAD_IF12]]: ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP32]], ptr [[TMP29]], align 4, !alias.scope [[META42]], !noalias [[META35]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]] -; CHECK: [[PRED_STORE_CONTINUE15]]: +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP27]], align 4, !alias.scope [[META38]], !noalias [[META40]] +; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP20]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]] +; CHECK: [[PRED_LOAD_CONTINUE13]]: +; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP23]], %[[PRED_LOAD_IF12]] ] ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_IF16]]: +; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]] +; CHECK: [[PRED_LOAD_IF14]]: ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP33]], align 4, !alias.scope [[META42]], !noalias [[META35]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]] -; CHECK: [[PRED_STORE_CONTINUE17]]: +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP31]], align 4, !alias.scope [[META38]], !noalias [[META40]] +; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP22]], i32 [[TMP25]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]] +; CHECK: [[PRED_LOAD_CONTINUE15]]: +; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP22]], %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP32]], %[[PRED_LOAD_IF14]] ] +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] +; CHECK-NEXT: [[TMP28:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP33]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i32> [[TMP28]], i32 0 +; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP36]], align 4, !alias.scope [[META42:![0-9]+]], !noalias [[META35]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i32> [[TMP28]], i32 1 +; CHECK-NEXT: store i32 [[TMP35]], ptr [[TMP37]], align 4, !alias.scope [[META42]], !noalias [[META35]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP34]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] @@ -559,13 +473,12 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE28:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE20:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META45:![0-9]+]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -574,7 +487,7 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]] +; CHECK-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20]] ; CHECK: [[PRED_STORE_IF19]]: ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST_1]], i32 [[TMP5]] ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4, !alias.scope [[META48]], !noalias [[META50]] @@ -587,36 +500,13 @@ define void @test_memory_op_between_loads_no_alias_via_rt_checks(ptr %dst, ptr % ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i32 0 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP18]], i32 1 ; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP19]], splat (i32 10) -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 -; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]] -; CHECK: [[PRED_STORE_IF21]]: ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 -; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]] -; CHECK: [[PRED_STORE_CONTINUE22]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]] -; CHECK: [[PRED_STORE_IF23]]: ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i32> [[TMP21]], i32 1 -; CHECK-NEXT: store i32 [[TMP27]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]] -; CHECK: [[PRED_STORE_CONTINUE24]]: -; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]] -; CHECK: [[PRED_STORE_IF25]]: -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP31]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]] -; CHECK: [[PRED_STORE_CONTINUE26]]: -; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_IF27]]: -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP35]], align 4, !alias.scope [[META54]], !noalias [[META55]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]] -; CHECK: [[PRED_STORE_CONTINUE28]]: +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP8]], <2 x i32> [[TMP21]], <2 x i32> [[TMP19]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0 +; CHECK-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4, !alias.scope [[META54:![0-9]+]], !noalias [[META55:![0-9]+]] +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[TMP20]], i32 1 +; CHECK-NEXT: store i32 [[TMP24]], ptr [[TMP26]], align 4, !alias.scope [[META54]], !noalias [[META55]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP56:![0-9]+]] @@ -685,45 +575,37 @@ define void @test_stores_not_sunk_due_to_aliasing_load(ptr %dst, ptr %alias, ptr ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE7:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META58:![0-9]+]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) -; CHECK-NEXT: [[TMP7:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true) +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <2 x i32> [[WIDE_LOAD]], splat (i32 11) ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP4]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 [[TMP15]], ptr [[TMP12]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META65:![0-9]+]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] -; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META61:![0-9]+]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP15]], %[[PRED_LOAD_IF]] ] ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] -; CHECK: [[PRED_STORE_IF6]]: +; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7]] +; CHECK: [[PRED_LOAD_IF6]]: ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[ALIAS]], i32 [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP14]], align 4, !alias.scope [[META61]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP16]], align 4, !alias.scope [[META63]], !noalias [[META65]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; CHECK: [[PRED_STORE_CONTINUE7]]: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] -; CHECK: [[PRED_STORE_IF8]]: +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP20]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]] +; CHECK: [[PRED_LOAD_CONTINUE7]]: +; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ [[TMP20]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF6]] ] ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]] -; CHECK-NEXT: store i32 10, ptr [[TMP18]], align 4, !alias.scope [[META63]], !noalias [[META65]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] -; CHECK: [[PRED_STORE_CONTINUE9]]: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] -; CHECK: [[PRED_STORE_IF10]]: ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP5]] -; CHECK-NEXT: store i32 10, ptr [[TMP19]], align 4, !alias.scope [[META63]], !noalias [[META65]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] -; CHECK: [[PRED_STORE_CONTINUE11]]: +; CHECK-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP22]], <2 x i32> splat (i32 10) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0 +; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP18]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META65:![0-9]+]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1 +; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP19]], align 4, !alias.scope [[META63]], !noalias [[META65]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP66:![0-9]+]] @@ -1084,3 +966,124 @@ loop.latch: exit: ret void } + +; Test with 3 predicated stores to the same address, but with different +; (non-complementary) predicates. +define void @test_three_stores_with_different_predicates(ptr %dst, ptr %src, ptr %cond) { +; CHECK-LABEL: define void @test_three_stores_with_different_predicates( +; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] +; CHECK: [[VECTOR_MEMCHECK]]: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND]], i64 400 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE11:.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META85:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11) +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4, !alias.scope [[META88:![0-9]+]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] +; CHECK: [[PRED_STORE_CONTINUE]]: +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] +; CHECK: [[PRED_STORE_IF2]]: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: store i32 1, ptr [[TMP7]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] +; CHECK: [[PRED_STORE_CONTINUE3]]: +; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i1> [[TMP3]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 10) +; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP9]], <2 x i1> [[TMP10]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 +; CHECK-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] +; CHECK: [[PRED_STORE_IF4]]: +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: store i32 2, ptr [[TMP13]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]] +; CHECK: [[PRED_STORE_CONTINUE5]]: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7:.*]] +; CHECK: [[PRED_STORE_IF6]]: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: store i32 2, ptr [[TMP15]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] +; CHECK: [[PRED_STORE_CONTINUE7]]: +; CHECK-NEXT: [[TMP16:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 9) +; CHECK-NEXT: [[TMP17:%.*]] = select <2 x i1> [[TMP9]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP17]], i32 0 +; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF8:.*]], label %[[PRED_STORE_CONTINUE9:.*]] +; CHECK: [[PRED_STORE_IF8]]: +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]] +; CHECK-NEXT: store i32 3, ptr [[TMP19]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE9]] +; CHECK: [[PRED_STORE_CONTINUE9]]: +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP17]], i32 1 +; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF10:.*]], label %[[PRED_STORE_CONTINUE11]] +; CHECK: [[PRED_STORE_IF10]]: +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP1]] +; CHECK-NEXT: store i32 3, ptr [[TMP21]], align 4, !alias.scope [[META88]], !noalias [[META85]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE11]] +; CHECK: [[PRED_STORE_CONTINUE11]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP90:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br [[EXIT:label %.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv + %c = load i32, ptr %gep.cond, align 4 + %c.0 = icmp ule i32 %c, 11 + br i1 %c.0, label %then.0, label %continue.0 + +then.0: + %gep.dst.then.0 = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 1, ptr %gep.dst.then.0, align 4 + br label %continue.0 + +continue.0: + %c.1 = icmp ule i32 %c, 10 + br i1 %c.1, label %then.1, label %continue.1 + +then.1: + %gep.dst.then.1 = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 2, ptr %gep.dst.then.1, align 4 + br label %continue.1 + +continue.1: + %c.2 = icmp ule i32 %c, 9 + br i1 %c.2, label %then.2, label %loop.latch + +then.2: + %gep.dst.then.2 = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 3, ptr %gep.dst.then.2, align 4 + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 100 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} +