From 642594ae87aca5181827cf9daff02e851b24ca09 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 4 Apr 2016 07:57:39 +0000 Subject: [PATCH] Exploit graph properties during domain generation As a CFG is often structured we can simplify the steps performed during domain generation. When we push domain information we can utilize the information from a block A to build the domain of a block B, if A dominates B and there is no loop backede on a path from A to B. When we pull domain information we can use information from a block A to build the domain of a block B if B post-dominates A. This patch implements both ideas and thereby simplifies domains that were not simplified by isl. For the FINAL basic block in test/ScopInfo/complex-successor-structure-3.ll we used to build a universe set with 81 basic sets. Now it actually is represented as universe set. While the initial idea to utilize the graph structure depended on the dominator and post-dominator tree we can use the available region information as a coarse grained replacement. To this end we push the region entry domain to the region exit and pull it from the region entry for the region exit if applicable. With this patch we now successfully compile External/SPEC/CINT2006/400_perlbench/400_perlbench and SingleSource/Benchmarks/Adobe-C++/loop_unroll. Differential Revision: http://reviews.llvm.org/D18450 llvm-svn: 265285 --- polly/include/polly/ScopInfo.h | 47 ++ polly/lib/Analysis/ScopInfo.cpp | 161 +++++- polly/test/Isl/CodeGen/phi_scalar_simple_1.ll | 2 +- .../ScopInfo/complex-successor-structure-2.ll | 538 ++++++++++++++++++ .../ScopInfo/complex-successor-structure-3.ll | 365 ++++++++++++ .../ScopInfo/complex-successor-structure.ll | 9 +- .../ScopInfo/intra_and_inter_bb_scalar_dep.ll | 2 +- .../ScopInfo/non-affine-region-with-loop-2.ll | 2 +- polly/test/ScopInfo/non_affine_region_1.ll | 9 +- ...er-used-as-base-pointer-and-scalar-read.ll | 2 +- polly/test/ScopInfo/remarks.ll | 8 +- polly/test/ScopInfo/switch-1.ll | 25 +- polly/test/ScopInfo/switch-2.ll | 15 +- polly/test/ScopInfo/switch-4.ll | 32 +- 14 files changed, 1131 insertions(+), 86 deletions(-) create mode 100644 polly/test/ScopInfo/complex-successor-structure-2.ll create mode 100644 polly/test/ScopInfo/complex-successor-structure-3.ll diff --git a/polly/include/polly/ScopInfo.h b/polly/include/polly/ScopInfo.h index 39efcb48824b..4b7514101b81 100644 --- a/polly/include/polly/ScopInfo.h +++ b/polly/include/polly/ScopInfo.h @@ -1421,6 +1421,53 @@ class Scop { void init(AliasAnalysis &AA, AssumptionCache &AC, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI); + /// @brief Propagate domains that are known due to graph properties. + /// + /// As a CFG is mostly structured we use the graph properties to propagate + /// domains without the need to compute all path conditions. In particular, if + /// a block A dominates a block B and B post-dominates A we know that the + /// domain of B is a superset of the domain of A. As we do not have + /// post-dominator information available here we use the less precise region + /// information. Given a region R, we know that the exit is always executed if + /// the entry was executed, thus the domain of the exit is a superset of the + /// domain of the entry. In case the exit can only be reached from within the + /// region the domains are in fact equal. This function will use this property + /// to avoid the generation of condition constraints that determine when a + /// branch is taken. If @p BB is a region entry block we will propagate its + /// domain to the region exit block. Additionally, we put the region exit + /// block in the @p FinishedExitBlocks set so we can later skip edges from + /// within the region to that block. + /// + /// @param BB The block for which the domain is currently propagated. + /// @param BBLoop The innermost affine loop surrounding @p BB. + /// @param FinishedExitBlocks Set of region exits the domain was set for. + /// @param SD The ScopDetection analysis for the current function. + /// @param LI The LoopInfo for the current function. + /// + void propagateDomainConstraintsToRegionExit( + BasicBlock *BB, Loop *BBLoop, + SmallPtrSetImpl &FinishedExitBlocks, ScopDetection &SD, + LoopInfo &LI); + + /// @brief Compute the union of predecessor domains for @p BB. + /// + /// To compute the union of all domains of predecessors of @p BB this + /// function applies similar reasoning on the CFG structure as described for + /// @see propagateDomainConstraintsToRegionExit + /// + /// @param BB The block for which the predecessor domains are collected. + /// @param Domain The domain under which BB is executed. + /// @param SD The ScopDetection analysis for the current function. + /// @param DT The DominatorTree for the current function. + /// @param LI The LoopInfo for the current function. + /// + /// @returns The domain under which @p BB is executed. + __isl_give isl_set *getPredecessorDomainConstraints(BasicBlock *BB, + isl_set *Domain, + ScopDetection &SD, + DominatorTree &DT, + LoopInfo &LI); + /// @brief Add loop carried constraints to the header block of the loop @p L. /// /// @param L The loop to process. diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index bcb2010bfb92..54ff1af52ee8 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -2237,6 +2237,52 @@ static __isl_give isl_set *adjustDomainDimensions(Scop &S, return Dom; } + +void Scop::propagateDomainConstraintsToRegionExit( + BasicBlock *BB, Loop *BBLoop, + SmallPtrSetImpl &FinishedExitBlocks, ScopDetection &SD, + LoopInfo &LI) { + + // Check if the block @p BB is the entry of a region. If so we propagate it's + // domain to the exit block of the region. Otherwise we are done. + auto *RI = R.getRegionInfo(); + auto *BBReg = RI ? RI->getRegionFor(BB) : nullptr; + auto *ExitBB = BBReg ? BBReg->getExit() : nullptr; + if (!BBReg || BBReg->getEntry() != BB || !R.contains(ExitBB)) + return; + + auto &BoxedLoops = *SD.getBoxedLoops(&getRegion()); + // Do not propagate the domain if there is a loop backedge inside the region + // that would prevent the exit block from beeing executed. + auto *L = BBLoop; + while (L && R.contains(L)) { + SmallVector LatchBBs; + BBLoop->getLoopLatches(LatchBBs); + for (auto *LatchBB : LatchBBs) + if (BB != LatchBB && BBReg->contains(LatchBB)) + return; + L = L->getParentLoop(); + } + + auto *Domain = DomainMap[BB]; + assert(Domain && "Cannot propagate a nullptr"); + + auto *ExitBBLoop = getFirstNonBoxedLoopFor(ExitBB, LI, BoxedLoops); + + // Since the dimensions of @p BB and @p ExitBB might be different we have to + // adjust the domain before we can propagate it. + auto *AdjustedDomain = + adjustDomainDimensions(*this, isl_set_copy(Domain), BBLoop, ExitBBLoop); + auto *&ExitDomain = DomainMap[ExitBB]; + + // If the exit domain is not yet created we set it otherwise we "add" the + // current domain. + ExitDomain = + ExitDomain ? isl_set_union(AdjustedDomain, ExitDomain) : AdjustedDomain; + + FinishedExitBlocks.insert(ExitBB); +} + bool Scop::buildDomainsWithBranchConstraints(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI) { auto &BoxedLoops = *SD.getBoxedLoops(&getRegion()); @@ -2252,6 +2298,7 @@ bool Scop::buildDomainsWithBranchConstraints(Region *R, ScopDetection &SD, // As we are only interested in non-loop carried constraints here we can // simply skip loop back edges. + SmallPtrSet FinishedExitBlocks; ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { @@ -2279,7 +2326,22 @@ bool Scop::buildDomainsWithBranchConstraints(Region *R, ScopDetection &SD, if (!Domain) continue; - Loop *BBLoop = getRegionNodeLoop(RN, LI); + auto *BBLoop = getRegionNodeLoop(RN, LI); + // Propagate the domain from BB directly to blocks that have a superset + // domain, at the moment only region exit nodes of regions that start in BB. + propagateDomainConstraintsToRegionExit(BB, BBLoop, FinishedExitBlocks, SD, + LI); + + // If all successors of BB have been set a domain through the propagation + // above we do not need to build condition sets but can just skip this + // block. However, it is important to note that this is a local property + // with regards to the region @p R. To this end FinishedExitBlocks is a + // local variable. + auto IsFinishedRegionExit = [&FinishedExitBlocks](BasicBlock *SuccBB) { + return FinishedExitBlocks.count(SuccBB); + }; + if (std::all_of(succ_begin(BB), succ_end(BB), IsFinishedRegionExit)) + continue; // Build the condition sets for the successor nodes of the current region // node. If it is a non-affine subregion we will always execute the single @@ -2300,6 +2362,13 @@ bool Scop::buildDomainsWithBranchConstraints(Region *R, ScopDetection &SD, isl_set *CondSet = ConditionSets[u]; BasicBlock *SuccBB = getRegionNodeSuccessor(RN, TI, u); + // If we propagate the domain of some block to "SuccBB" we do not have to + // adjust the domain. + if (FinishedExitBlocks.count(SuccBB)) { + isl_set_free(CondSet); + continue; + } + // Skip back edges. if (DT.dominates(SuccBB, BB)) { isl_set_free(CondSet); @@ -2352,6 +2421,65 @@ getDomainForBlock(BasicBlock *BB, DenseMap &DomainMap, return getDomainForBlock(R->getEntry(), DomainMap, RI); } +isl_set *Scop::getPredecessorDomainConstraints(BasicBlock *BB, isl_set *Domain, + ScopDetection &SD, + DominatorTree &DT, + LoopInfo &LI) { + // If @p BB is the ScopEntry we are done + if (R.getEntry() == BB) + return isl_set_universe(isl_set_get_space(Domain)); + + // The set of boxed loops (loops in non-affine subregions) for this SCoP. + auto &BoxedLoops = *SD.getBoxedLoops(&getRegion()); + + // The region info of this function. + auto &RI = *R.getRegionInfo(); + + auto *BBLoop = getFirstNonBoxedLoopFor(BB, LI, BoxedLoops); + + // A domain to collect all predecessor domains, thus all conditions under + // which the block is executed. To this end we start with the empty domain. + isl_set *PredDom = isl_set_empty(isl_set_get_space(Domain)); + + // Set of regions of which the entry block domain has been propagated to BB. + // all predecessors inside any of the regions can be skipped. + SmallSet PropagatedRegions; + + for (auto *PredBB : predecessors(BB)) { + // Skip backedges. + if (DT.dominates(BB, PredBB)) + continue; + + // If the predecessor is in a region we used for propagation we can skip it. + auto PredBBInRegion = [PredBB](Region *PR) { return PR->contains(PredBB); }; + if (std::any_of(PropagatedRegions.begin(), PropagatedRegions.end(), + PredBBInRegion)) { + continue; + } + + // Check if there is a valid region we can use for propagation, thus look + // for a region that contains the predecessor and has @p BB as exit block. + auto *PredR = RI.getRegionFor(PredBB); + while (PredR->getExit() != BB && !PredR->contains(BB)) + PredR->getParent(); + + // If a valid region for propagation was found use the entry of that region + // for propagation, otherwise the PredBB directly. + if (PredR->getExit() == BB) { + PredBB = PredR->getEntry(); + PropagatedRegions.insert(PredR); + } + + auto *PredBBDom = getDomainForBlock(PredBB, DomainMap, RI); + auto *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, BoxedLoops); + PredBBDom = adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop); + + PredDom = isl_set_union(PredDom, PredBBDom); + } + + return PredDom; +} + void Scop::propagateDomainConstraints(Region *R, ScopDetection &SD, DominatorTree &DT, LoopInfo &LI) { // Iterate over the region R and propagate the domain constrains from the @@ -2363,9 +2491,6 @@ void Scop::propagateDomainConstraints(Region *R, ScopDetection &SD, // predecessors have been visited before a block or non-affine subregion is // visited. - // The set of boxed loops (loops in non-affine subregions) for this SCoP. - auto &BoxedLoops = *SD.getBoxedLoops(&getRegion()); - ReversePostOrderTraversal RTraversal(R); for (auto *RN : RTraversal) { @@ -2393,34 +2518,12 @@ void Scop::propagateDomainConstraints(Region *R, ScopDetection &SD, continue; } - Loop *BBLoop = getRegionNodeLoop(RN, LI); - - isl_set *PredDom = isl_set_empty(isl_set_get_space(Domain)); - for (auto *PredBB : predecessors(BB)) { - - // Skip backedges - if (DT.dominates(BB, PredBB)) - continue; - - isl_set *PredBBDom = nullptr; - - // Handle the SCoP entry block with its outside predecessors. - if (!getRegion().contains(PredBB)) - PredBBDom = isl_set_universe(isl_set_get_space(PredDom)); - - if (!PredBBDom) { - PredBBDom = getDomainForBlock(PredBB, DomainMap, *R->getRegionInfo()); - auto *PredBBLoop = getFirstNonBoxedLoopFor(PredBB, LI, BoxedLoops); - PredBBDom = - adjustDomainDimensions(*this, PredBBDom, PredBBLoop, BBLoop); - } - - PredDom = isl_set_union(PredDom, PredBBDom); - } - // Under the union of all predecessor conditions we can reach this block. + auto *PredDom = getPredecessorDomainConstraints(BB, Domain, SD, DT, LI); Domain = isl_set_coalesce(isl_set_intersect(Domain, PredDom)); + Domain = isl_set_align_params(Domain, getParamSpace()); + Loop *BBLoop = getRegionNodeLoop(RN, LI); if (BBLoop && BBLoop->getHeader() == BB && getRegion().contains(BBLoop)) addLoopBoundsToHeaderDomain(BBLoop, LI); diff --git a/polly/test/Isl/CodeGen/phi_scalar_simple_1.ll b/polly/test/Isl/CodeGen/phi_scalar_simple_1.ll index c39b24f7caf8..895d32902ddc 100644 --- a/polly/test/Isl/CodeGen/phi_scalar_simple_1.ll +++ b/polly/test/Isl/CodeGen/phi_scalar_simple_1.ll @@ -28,7 +28,7 @@ entry: ; CHECK-LABEL: polly.start: ; CHECK: store i32 %x, i32* %x.addr.0.phiops -; CHECK-LABEL: polly.merge: +; CHECK-LABEL: polly.exiting: ; CHECK: %x.addr.0.final_reload = load i32, i32* %x.addr.0.s2a for.cond: ; preds = %for.inc4, %entry diff --git a/polly/test/ScopInfo/complex-successor-structure-2.ll b/polly/test/ScopInfo/complex-successor-structure-2.ll new file mode 100644 index 000000000000..beb308c370e4 --- /dev/null +++ b/polly/test/ScopInfo/complex-successor-structure-2.ll @@ -0,0 +1,538 @@ +; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops \ +; RUN: < %s 2>&1 | FileCheck %s + +; We build a scop for the region for.body->B13. The CFG is of the following +; form and the branch conditions are build from "smax" SCEVs. However, in +; contrast to complex-success-structure.ll the smax constraints do not grow +; anymore after B4. This will keep the condition construction bounded. +; Since we propagate the domains from one B(X) to the B(X+1) we can also keep +; the domains simple. We will bail anyway due to invalid required invariant +; loads. +; +; CHECK-NOT: Low complexity assumption +; +; | +; for.body <--+ +; | | +; |---------+ +; | +; \ / +; if.entry --+ +; | | +; A0 | +; | | +; B0 <-----+ +; | \ +; | \ +; A1 \ +; | | +; | | +; B1<--+ +; | \ +; | \ +; A2 \ +; | | +; | | +; B2<--+ +; | \ +; | \ +; A3 \ +; | | +; | | +; B3<--+ +; | \ +; | \ +; A4 \ +; | | +; | | +; B4<--+ +; | \ +; | \ +; A5 \ +; | | +; | | +; B5<--+ +; | \ +; | \ +; A6 \ +; | | +; | | +; B6<--+ +; | \ +; | \ +; A7 \ +; | | +; | | +; B7<--+ +; | \ +; | \ +; A8 \ +; | | +; | | +; B8<--+ +; | \ +; | \ +; A9 \ +; | | +; | | +; B9<--+ +; | \ +; | \ +; A10 \ +; | | +; | | +; B10<-+ +; | \ +; | \ +; A11 \ +; | | +; | | +; B11<-+ +; | \ +; | \ +; A12 \ +; | | +; | | +; B12<-+ +; | \ +; | \ +; A13 \ +; | | +; | | +; B13<-+ + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64" +target triple = "thumbv7--linux-android" + +@Table1 = external global [2304 x i16], align 2 +@Table2 = external global [1792 x i16], align 2 +@Table3 = external global [16 x i16], align 2 + +define void @foo(i16* nocapture readonly %indice, i16* nocapture %Output, i16* nocapture readonly %In1, i16* nocapture readonly %In2, i16 signext %var, i16 signext %var2) { +entry: + %.reg2mem158 = alloca i16 + %.reg2mem156 = alloca i16 + %.reg2mem154 = alloca i16 + %.reg2mem152 = alloca i16 + %.reg2mem150 = alloca i16 + %.reg2mem = alloca i16 + %Temp_Ref = alloca [16 x i16], align 2 + %0 = bitcast [16 x i16]* %Temp_Ref to i8* + %cmp = icmp eq i16 %var, 0 + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.2138 = phi i32 [ %inc47, %for.body ], [ 0, %entry ] + %arrayidx28 = getelementptr inbounds [16 x i16], [16 x i16]* @Table3, i32 0, i32 %i.2138 + %1 = load i16, i16* %arrayidx28, align 2 + %conv29 = sext i16 %1 to i32 + %arrayidx36 = getelementptr inbounds i16, i16* %In2, i32 %i.2138 + %2 = load i16, i16* %arrayidx36, align 2 + %conv37 = sext i16 %2 to i32 + %shl38147 = add nsw i32 %conv37, %conv29 + %add35.1 = add nuw nsw i32 %i.2138, 16 + %arrayidx36.1 = getelementptr inbounds i16, i16* %In2, i32 %add35.1 + %3 = load i16, i16* %arrayidx36.1, align 2 + %conv37.1 = sext i16 %3 to i32 + %shl38.1148 = add nsw i32 %conv37.1, %shl38147 + %add35.2 = add nuw nsw i32 %i.2138, 32 + %arrayidx36.2 = getelementptr inbounds i16, i16* %In2, i32 %add35.2 + %4 = load i16, i16* %arrayidx36.2, align 2 + %conv37.2 = sext i16 %4 to i32 + %shl38.2149 = add nsw i32 %conv37.2, %shl38.1148 + %add39.2 = shl i32 %shl38.2149, 14 + %add43 = add nsw i32 %add39.2, 32768 + %shr129 = lshr i32 %add43, 16 + %conv44 = trunc i32 %shr129 to i16 + %arrayidx45 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 %i.2138 + store i16 %conv44, i16* %arrayidx45, align 2 + %inc47 = add nuw nsw i32 %i.2138, 1 + %exitcond144 = icmp eq i32 %i.2138, 15 + br i1 %exitcond144, label %if.entry, label %for.body + +if.entry: ; preds = %for.body + %5 = load i16, i16* %In1, align 2 + %conv54 = sext i16 %5 to i32 + %mul55 = mul nsw i32 %conv54, 29491 + %shr56127 = lshr i32 %mul55, 15 + %arrayidx57 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 0 + %6 = load i16, i16* %arrayidx57, align 2 + %conv58 = sext i16 %6 to i32 + %mul59 = mul nsw i32 %conv58, 3277 + %shr60128 = lshr i32 %mul59, 15 + %add61 = add nuw nsw i32 %shr60128, %shr56127 + %conv62 = trunc i32 %add61 to i16 + store i16 %conv62, i16* %Output, align 2 + %arrayidx53.1 = getelementptr inbounds i16, i16* %In1, i32 1 + %7 = load i16, i16* %arrayidx53.1, align 2 + %conv54.1 = sext i16 %7 to i32 + %mul55.1 = mul nsw i32 %conv54.1, 29491 + %shr56127.1 = lshr i32 %mul55.1, 15 + %arrayidx57.1 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 1 + %8 = load i16, i16* %arrayidx57.1, align 2 + %conv58.1 = sext i16 %8 to i32 + %mul59.1 = mul nsw i32 %conv58.1, 3277 + %shr60128.1 = lshr i32 %mul59.1, 15 + %add61.1 = add nuw nsw i32 %shr60128.1, %shr56127.1 + %conv62.1 = trunc i32 %add61.1 to i16 + %arrayidx63.1 = getelementptr inbounds i16, i16* %Output, i32 1 + store i16 %conv62.1, i16* %arrayidx63.1, align 2 + %arrayidx53.2 = getelementptr inbounds i16, i16* %In1, i32 2 + %9 = load i16, i16* %arrayidx53.2, align 2 + %conv54.2 = sext i16 %9 to i32 + %mul55.2 = mul nsw i32 %conv54.2, 29491 + %shr56127.2 = lshr i32 %mul55.2, 15 + %arrayidx57.2 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 2 + %10 = load i16, i16* %arrayidx57.2, align 2 + %conv58.2 = sext i16 %10 to i32 + %mul59.2 = mul nsw i32 %conv58.2, 3277 + %shr60128.2 = lshr i32 %mul59.2, 15 + %add61.2 = add nuw nsw i32 %shr60128.2, %shr56127.2 + %conv62.2 = trunc i32 %add61.2 to i16 + %arrayidx63.2 = getelementptr inbounds i16, i16* %Output, i32 2 + store i16 %conv62.2, i16* %arrayidx63.2, align 2 + %arrayidx53.3 = getelementptr inbounds i16, i16* %In1, i32 3 + %11 = load i16, i16* %arrayidx53.3, align 2 + %conv54.3 = sext i16 %11 to i32 + %mul55.3 = mul nsw i32 %conv54.3, 29491 + %shr56127.3 = lshr i32 %mul55.3, 15 + %arrayidx57.3 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 3 + %12 = load i16, i16* %arrayidx57.3, align 2 + %conv58.3 = sext i16 %12 to i32 + %mul59.3 = mul nsw i32 %conv58.3, 3277 + %shr60128.3 = lshr i32 %mul59.3, 15 + %add61.3 = add nuw nsw i32 %shr60128.3, %shr56127.3 + %conv62.3 = trunc i32 %add61.3 to i16 + %arrayidx63.3 = getelementptr inbounds i16, i16* %Output, i32 3 + store i16 %conv62.3, i16* %arrayidx63.3, align 2 + %arrayidx53.4 = getelementptr inbounds i16, i16* %In1, i32 4 + %13 = load i16, i16* %arrayidx53.4, align 2 + %conv54.4 = sext i16 %13 to i32 + %mul55.4 = mul nsw i32 %conv54.4, 29491 + %shr56127.4 = lshr i32 %mul55.4, 15 + %arrayidx57.4 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 4 + %14 = load i16, i16* %arrayidx57.4, align 2 + %conv58.4 = sext i16 %14 to i32 + %mul59.4 = mul nsw i32 %conv58.4, 3277 + %shr60128.4 = lshr i32 %mul59.4, 15 + %add61.4 = add nuw nsw i32 %shr60128.4, %shr56127.4 + %conv62.4 = trunc i32 %add61.4 to i16 + %arrayidx63.4 = getelementptr inbounds i16, i16* %Output, i32 4 + store i16 %conv62.4, i16* %arrayidx63.4, align 2 + %arrayidx53.5 = getelementptr inbounds i16, i16* %In1, i32 5 + %15 = load i16, i16* %arrayidx53.5, align 2 + %conv54.5 = sext i16 %15 to i32 + %mul55.5 = mul nsw i32 %conv54.5, 29491 + %shr56127.5 = lshr i32 %mul55.5, 15 + %arrayidx57.5 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 5 + %16 = load i16, i16* %arrayidx57.5, align 2 + %conv58.5 = sext i16 %16 to i32 + %mul59.5 = mul nsw i32 %conv58.5, 3277 + %shr60128.5 = lshr i32 %mul59.5, 15 + %add61.5 = add nuw nsw i32 %shr60128.5, %shr56127.5 + %conv62.5 = trunc i32 %add61.5 to i16 + %arrayidx63.5 = getelementptr inbounds i16, i16* %Output, i32 5 + store i16 %conv62.5, i16* %arrayidx63.5, align 2 + %arrayidx53.6 = getelementptr inbounds i16, i16* %In1, i32 6 + %17 = load i16, i16* %arrayidx53.6, align 2 + %conv54.6 = sext i16 %17 to i32 + %mul55.6 = mul nsw i32 %conv54.6, 29491 + %shr56127.6 = lshr i32 %mul55.6, 15 + %arrayidx57.6 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 6 + %18 = load i16, i16* %arrayidx57.6, align 2 + %conv58.6 = sext i16 %18 to i32 + %mul59.6 = mul nsw i32 %conv58.6, 3277 + %shr60128.6 = lshr i32 %mul59.6, 15 + %add61.6 = add nuw nsw i32 %shr60128.6, %shr56127.6 + %conv62.6 = trunc i32 %add61.6 to i16 + %arrayidx63.6 = getelementptr inbounds i16, i16* %Output, i32 6 + store i16 %conv62.6, i16* %arrayidx63.6, align 2 + %arrayidx53.7 = getelementptr inbounds i16, i16* %In1, i32 7 + %19 = load i16, i16* %arrayidx53.7, align 2 + %conv54.7 = sext i16 %19 to i32 + %mul55.7 = mul nsw i32 %conv54.7, 29491 + %shr56127.7 = lshr i32 %mul55.7, 15 + %arrayidx57.7 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 7 + %20 = load i16, i16* %arrayidx57.7, align 2 + %conv58.7 = sext i16 %20 to i32 + %mul59.7 = mul nsw i32 %conv58.7, 3277 + %shr60128.7 = lshr i32 %mul59.7, 15 + %add61.7 = add nuw nsw i32 %shr60128.7, %shr56127.7 + %conv62.7 = trunc i32 %add61.7 to i16 + %arrayidx63.7 = getelementptr inbounds i16, i16* %Output, i32 7 + store i16 %conv62.7, i16* %arrayidx63.7, align 2 + %arrayidx53.8 = getelementptr inbounds i16, i16* %In1, i32 8 + %21 = load i16, i16* %arrayidx53.8, align 2 + %conv54.8 = sext i16 %21 to i32 + %mul55.8 = mul nsw i32 %conv54.8, 29491 + %shr56127.8 = lshr i32 %mul55.8, 15 + %arrayidx57.8 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 8 + %22 = load i16, i16* %arrayidx57.8, align 2 + %conv58.8 = sext i16 %22 to i32 + %mul59.8 = mul nsw i32 %conv58.8, 3277 + %shr60128.8 = lshr i32 %mul59.8, 15 + %add61.8 = add nuw nsw i32 %shr60128.8, %shr56127.8 + %conv62.8 = trunc i32 %add61.8 to i16 + %arrayidx63.8 = getelementptr inbounds i16, i16* %Output, i32 8 + store i16 %conv62.8, i16* %arrayidx63.8, align 2 + %arrayidx53.9 = getelementptr inbounds i16, i16* %In1, i32 9 + %23 = load i16, i16* %arrayidx53.9, align 2 + %conv54.9 = sext i16 %23 to i32 + %mul55.9 = mul nsw i32 %conv54.9, 29491 + %shr56127.9 = lshr i32 %mul55.9, 15 + %arrayidx57.9 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 9 + %24 = load i16, i16* %arrayidx57.9, align 2 + %conv58.9 = sext i16 %24 to i32 + %mul59.9 = mul nsw i32 %conv58.9, 3277 + %shr60128.9 = lshr i32 %mul59.9, 15 + %add61.9 = add nuw nsw i32 %shr60128.9, %shr56127.9 + %conv62.9 = trunc i32 %add61.9 to i16 + %arrayidx63.9 = getelementptr inbounds i16, i16* %Output, i32 9 + store i16 %conv62.9, i16* %arrayidx63.9, align 2 + %arrayidx53.10 = getelementptr inbounds i16, i16* %In1, i32 10 + %25 = load i16, i16* %arrayidx53.10, align 2 + %conv54.10 = sext i16 %25 to i32 + %mul55.10 = mul nsw i32 %conv54.10, 29491 + %shr56127.10 = lshr i32 %mul55.10, 15 + %arrayidx57.10 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 10 + %26 = load i16, i16* %arrayidx57.10, align 2 + %conv58.10 = sext i16 %26 to i32 + %mul59.10 = mul nsw i32 %conv58.10, 3277 + %shr60128.10 = lshr i32 %mul59.10, 15 + %add61.10 = add nuw nsw i32 %shr60128.10, %shr56127.10 + %conv62.10 = trunc i32 %add61.10 to i16 + %arrayidx63.10 = getelementptr inbounds i16, i16* %Output, i32 10 + store i16 %conv62.10, i16* %arrayidx63.10, align 2 + %arrayidx53.11 = getelementptr inbounds i16, i16* %In1, i32 11 + %27 = load i16, i16* %arrayidx53.11, align 2 + %conv54.11 = sext i16 %27 to i32 + %mul55.11 = mul nsw i32 %conv54.11, 29491 + %shr56127.11 = lshr i32 %mul55.11, 15 + %arrayidx57.11 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 11 + %28 = load i16, i16* %arrayidx57.11, align 2 + %conv58.11 = sext i16 %28 to i32 + %mul59.11 = mul nsw i32 %conv58.11, 3277 + %shr60128.11 = lshr i32 %mul59.11, 15 + %add61.11 = add nuw nsw i32 %shr60128.11, %shr56127.11 + %conv62.11 = trunc i32 %add61.11 to i16 + %arrayidx63.11 = getelementptr inbounds i16, i16* %Output, i32 11 + store i16 %conv62.11, i16* %arrayidx63.11, align 2 + %arrayidx53.12 = getelementptr inbounds i16, i16* %In1, i32 12 + %29 = load i16, i16* %arrayidx53.12, align 2 + %conv54.12 = sext i16 %29 to i32 + %mul55.12 = mul nsw i32 %conv54.12, 29491 + %shr56127.12 = lshr i32 %mul55.12, 15 + %arrayidx57.12 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 12 + %30 = load i16, i16* %arrayidx57.12, align 2 + %conv58.12 = sext i16 %30 to i32 + %mul59.12 = mul nsw i32 %conv58.12, 3277 + %shr60128.12 = lshr i32 %mul59.12, 15 + %add61.12 = add nuw nsw i32 %shr60128.12, %shr56127.12 + %conv62.12 = trunc i32 %add61.12 to i16 + %arrayidx63.12 = getelementptr inbounds i16, i16* %Output, i32 12 + store i16 %conv62.12, i16* %arrayidx63.12, align 2 + %arrayidx53.13 = getelementptr inbounds i16, i16* %In1, i32 13 + %31 = load i16, i16* %arrayidx53.13, align 2 + %conv54.13 = sext i16 %31 to i32 + %mul55.13 = mul nsw i32 %conv54.13, 29491 + %shr56127.13 = lshr i32 %mul55.13, 15 + %arrayidx57.13 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 13 + %32 = load i16, i16* %arrayidx57.13, align 2 + %conv58.13 = sext i16 %32 to i32 + %mul59.13 = mul nsw i32 %conv58.13, 3277 + %shr60128.13 = lshr i32 %mul59.13, 15 + %add61.13 = add nuw nsw i32 %shr60128.13, %shr56127.13 + %conv62.13 = trunc i32 %add61.13 to i16 + %arrayidx63.13 = getelementptr inbounds i16, i16* %Output, i32 13 + store i16 %conv62.13, i16* %arrayidx63.13, align 2 + %arrayidx53.14 = getelementptr inbounds i16, i16* %In1, i32 14 + %33 = load i16, i16* %arrayidx53.14, align 2 + %conv54.14 = sext i16 %33 to i32 + %mul55.14 = mul nsw i32 %conv54.14, 29491 + %shr56127.14 = lshr i32 %mul55.14, 15 + %arrayidx57.14 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 14 + %34 = load i16, i16* %arrayidx57.14, align 2 + %conv58.14 = sext i16 %34 to i32 + %mul59.14 = mul nsw i32 %conv58.14, 3277 + %shr60128.14 = lshr i32 %mul59.14, 15 + %add61.14 = add nuw nsw i32 %shr60128.14, %shr56127.14 + %conv62.14 = trunc i32 %add61.14 to i16 + %arrayidx63.14 = getelementptr inbounds i16, i16* %Output, i32 14 + store i16 %conv62.14, i16* %arrayidx63.14, align 2 + %arrayidx53.15 = getelementptr inbounds i16, i16* %In1, i32 15 + %35 = load i16, i16* %arrayidx53.15, align 2 + %conv54.15 = sext i16 %35 to i32 + %mul55.15 = mul nsw i32 %conv54.15, 29491 + %shr56127.15 = lshr i32 %mul55.15, 15 + %arrayidx57.15 = getelementptr inbounds [16 x i16], [16 x i16]* %Temp_Ref, i32 0, i32 15 + %36 = load i16, i16* %arrayidx57.15, align 2 + %conv58.15 = sext i16 %36 to i32 + %mul59.15 = mul nsw i32 %conv58.15, 3277 + %shr60128.15 = lshr i32 %mul59.15, 15 + %add61.15 = add nuw nsw i32 %shr60128.15, %shr56127.15 + %conv62.15 = trunc i32 %add61.15 to i16 + %arrayidx63.15 = getelementptr inbounds i16, i16* %Output, i32 15 + store i16 %conv62.15, i16* %arrayidx63.15, align 2 + store i16 %conv62.9, i16* %.reg2mem + store i16 %conv62.10, i16* %.reg2mem150 + store i16 %conv62.11, i16* %.reg2mem152 + store i16 %conv62.12, i16* %.reg2mem154 + store i16 %conv62.13, i16* %.reg2mem156 + store i16 %conv62.14, i16* %.reg2mem158 + %.reload159 = load i16, i16* %.reg2mem158 + %.reload157 = load i16, i16* %.reg2mem156 + %.reload155 = load i16, i16* %.reg2mem154 + %.reload153 = load i16, i16* %.reg2mem152 + %.reload151 = load i16, i16* %.reg2mem150 + %.reload = load i16, i16* %.reg2mem + %37 = load i16, i16* %Output, align 2 + %cmp77 = icmp slt i16 %37, 128 + br i1 %cmp77, label %A0, label %B0 + +A0: ; preds = %if.entry + store i16 128, i16* %Output, align 2 + br label %B0 + +B0: ; preds = %A, %if.entry + %38 = phi i16 [ 128, %A0 ], [ %37, %if.entry ] + %add84 = add i16 %38, 128 + %arrayidx74.1 = getelementptr inbounds i16, i16* %Output, i32 1 + %39 = load i16, i16* %arrayidx74.1, align 2 + %cmp77.1 = icmp slt i16 %39, %add84 + br i1 %cmp77.1, label %A1, label %B1 + +A1: ; preds = %B + store i16 %add84, i16* %arrayidx74.1, align 2 + br label %B1 + +B1: ; preds = %A1, %B + %40 = phi i16 [ %add84, %A1 ], [ %39, %B0 ] + %add84.1 = add i16 %40, 128 + %arrayidx74.2 = getelementptr inbounds i16, i16* %Output, i32 2 + %41 = load i16, i16* %arrayidx74.2, align 2 + %cmp77.2 = icmp slt i16 %41, %add84.1 + br i1 %cmp77.2, label %A2, label %B2 + +A2: ; preds = %B1 + store i16 %add84.1, i16* %arrayidx74.2, align 2 + br label %B2 + +B2: ; preds = %A2, %B1 + %42 = phi i16 [ %add84.1, %A2 ], [ %41, %B1 ] + %add84.2 = add i16 %42, 128 + %arrayidx74.3 = getelementptr inbounds i16, i16* %Output, i32 3 + %43 = load i16, i16* %arrayidx74.3, align 2 + %cmp77.3 = icmp slt i16 %43, %add84.2 + br i1 %cmp77.3, label %A3, label %B3 + +A3: ; preds = %B2 + store i16 %add84.2, i16* %arrayidx74.3, align 2 + br label %B3 + +B3: ; preds = %A3, %B2 + %44 = phi i16 [ %add84.2, %A3 ], [ %43, %B2 ] + %add84.3 = add i16 %44, 128 + %arrayidx74.4 = getelementptr inbounds i16, i16* %Output, i32 4 + %45 = load i16, i16* %arrayidx74.4, align 2 + %cmp77.4 = icmp slt i16 %45, %add84.3 + br i1 %cmp77.4, label %A4, label %B4 + +A4: ; preds = %B3 + store i16 %add84.3, i16* %arrayidx74.4, align 2 + br label %B4 + +B4: ; preds = %A4, %B3 + %46 = phi i16 [ %add84.3, %A4 ], [ %45, %B3 ] + %add84.4 = add i16 %46, 128 + %arrayidx74.5 = getelementptr inbounds i16, i16* %Output, i32 5 + %47 = load i16, i16* %arrayidx74.5, align 2 + %cmp77.5 = icmp slt i16 %47, %add84.4 + br i1 %cmp77.5, label %A5, label %B5 + +A5: ; preds = %B4 + store i16 %add84.4, i16* %arrayidx74.5, align 2 + br label %B5 + +B5: ; preds = %A5, %B4 + %48 = phi i16 [ %add84.4, %A5 ], [ %47, %B4 ] + %add84.5 = add i16 %46, 128 + %arrayidx74.6 = getelementptr inbounds i16, i16* %Output, i32 6 + %49 = load i16, i16* %arrayidx74.6, align 2 + %cmp77.6 = icmp slt i16 %49, %add84.5 + br i1 %cmp77.6, label %A6, label %B6 + +A6: ; preds = %B5 + store i16 %add84.5, i16* %arrayidx74.6, align 2 + br label %B6 + +B6: ; preds = %A6, %B5 + %50 = phi i16 [ %add84.5, %A6 ], [ %49, %B5 ] + %add84.6 = add i16 %46, 128 + %arrayidx74.7 = getelementptr inbounds i16, i16* %Output, i32 7 + %51 = load i16, i16* %arrayidx74.7, align 2 + %cmp77.7 = icmp slt i16 %51, %add84.6 + br i1 %cmp77.7, label %A7, label %B7 + +A7: ; preds = %B6 + store i16 %add84.6, i16* %arrayidx74.7, align 2 + br label %B7 + +B7: ; preds = %A7, %B6 + %52 = phi i16 [ %add84.6, %A7 ], [ %51, %B6 ] + %add84.7 = add i16 %46, 128 + %arrayidx74.8 = getelementptr inbounds i16, i16* %Output, i32 8 + %53 = load i16, i16* %arrayidx74.8, align 2 + %cmp77.8 = icmp slt i16 %53, %add84.7 + br i1 %cmp77.8, label %A8, label %B8 + +A8: ; preds = %B7 + store i16 %add84.7, i16* %arrayidx74.8, align 2 + br label %B8 + +B8: ; preds = %A8, %B7 + %54 = phi i16 [ %add84.7, %A8 ], [ %53, %B7 ] + %add84.8 = add i16 %46, 128 + %cmp77.9 = icmp slt i16 %.reload, %add84.8 + br i1 %cmp77.9, label %A9, label %B9 + +A9: ; preds = %B8 + %arrayidx74.9 = getelementptr inbounds i16, i16* %Output, i32 9 + store i16 %add84.8, i16* %arrayidx74.9, align 2 + br label %B9 + +B9: ; preds = %A9, %B8 + %55 = phi i16 [ %add84.8, %A9 ], [ %.reload, %B8 ] + %add84.9 = add i16 %46, 128 + %cmp77.10 = icmp slt i16 %.reload151, %add84.9 + br i1 %cmp77.10, label %A10, label %B10 + +A10: ; preds = %B9 + %arrayidx74.10 = getelementptr inbounds i16, i16* %Output, i32 10 + store i16 %add84.9, i16* %arrayidx74.10, align 2 + br label %B10 + +B10: ; preds = %A10, %B9 + %56 = phi i16 [ %add84.9, %A10 ], [ %.reload151, %B9 ] + %add84.10 = add i16 %46, 128 + %cmp77.11 = icmp slt i16 %.reload153, %add84.10 + br i1 %cmp77.11, label %A11, label %B11 + +A11: ; preds = %B10 + %arrayidx74.11 = getelementptr inbounds i16, i16* %Output, i32 11 + store i16 %add84.10, i16* %arrayidx74.11, align 2 + br label %B11 + +B11: ; preds = %A11, %B10 + %57 = phi i16 [ %add84.10, %A11 ], [ %.reload153, %B10 ] + %add84.11 = add i16 %46, 128 + %cmp77.12 = icmp slt i16 %.reload155, %add84.11 + br i1 %cmp77.12, label %A12, label %B13 + +A12: ; preds = %B11 + %arrayidx74.12 = getelementptr inbounds i16, i16* %Output, i32 12 + store i16 %add84.11, i16* %arrayidx74.12, align 2 + br label %B13 + +B13: ; preds = %A12, %B13 + ret void +} diff --git a/polly/test/ScopInfo/complex-successor-structure-3.ll b/polly/test/ScopInfo/complex-successor-structure-3.ll new file mode 100644 index 000000000000..9cfc1eb0a5c1 --- /dev/null +++ b/polly/test/ScopInfo/complex-successor-structure-3.ll @@ -0,0 +1,365 @@ +; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s +; +; Check that propagation of domains from A(X) to A(X+1) will keep the +; domains small and concise. +; +; CHECK: Assumed Context: +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { : } +; CHECK-NEXT: Invalid Context: +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { : 1 = 0 } +; +; CHECK: Stmt_FINAL +; CHECK-NEXT: Domain := +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { Stmt_FINAL[] }; +; CHECK-NEXT: Schedule := +; CHECK-NEXT: [tmp5, tmp, tmp8, tmp11, tmp14, tmp17, tmp20, tmp23, tmp26, p_9, p_10, p_11, p_12] -> { Stmt_FINAL[] -> [22] }; +; +; +; void f(short *restrict In, int *restrict Out) { +; int InV, V, Idx; +; Idx = 0; +; V = 999; +; +; A0: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B0: +; V = V + 42; +; Out[V]++; +; } else { +; C0: +; V = InV; +; Out[V]--; +; } +; +; A1: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B1: +; V = V + 42; +; Out[V]++; +; } else { +; C1: +; V = InV; +; Out[V]--; +; } +; V = 999; +; +; A2: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B2: +; V = V + 42; +; Out[V]++; +; } else { +; C2: +; V = InV; +; Out[V]--; +; } +; +; A3: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B3: +; V = V + 42; +; Out[V]++; +; } else { +; C3: +; V = InV; +; Out[V]--; +; } +; V = 999; +; +; A4: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B4: +; V = V + 42; +; Out[V]++; +; } else { +; C4: +; V = InV; +; Out[V]--; +; } +; +; A5: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B5: +; V = V + 42; +; Out[V]++; +; } else { +; C5: +; V = InV; +; Out[V]--; +; } +; V = 999; +; +; A6: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B6: +; V = V + 42; +; Out[V]++; +; } else { +; C6: +; V = InV; +; Out[V]--; +; } +; +; A7: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B7: +; V = V + 42; +; Out[V]++; +; } else { +; C7: +; V = InV; +; Out[V]--; +; } +; V = 999; +; +; A8: +; InV = In[Idx++]; +; if (InV < V + 42) { +; B8: +; V = V + 42; +; Out[V]++; +; } else { +; C8: +; V = InV; +; Out[V]--; +; } +; FINAL: +; Out[V]++; +; +; ScopExit: +; return; +; } +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i16* noalias %In, i32* noalias %Out) { +entry: + %tmp = load i16, i16* %In, align 2 + %conv = sext i16 %tmp to i32 + %cmp = icmp slt i16 %tmp, 1041 + br i1 %cmp, label %B0, label %C0 + +B0: ; preds = %entry + %arrayidx4 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp3 = load i32, i32* %arrayidx4, align 4 + %inc5 = add nsw i32 %tmp3, 1 + store i32 %inc5, i32* %arrayidx4, align 4 + br label %A1 + +C0: ; preds = %entry + %idxprom6 = sext i16 %tmp to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %Out, i64 %idxprom6 + %tmp4 = load i32, i32* %arrayidx7, align 4 + %dec = add nsw i32 %tmp4, -1 + store i32 %dec, i32* %arrayidx7, align 4 + br label %A1 + +A1: ; preds = %B0, %C0 + %V.0 = phi i32 [ 1041, %B0 ], [ %conv, %C0 ] + %arrayidx10 = getelementptr inbounds i16, i16* %In, i64 1 + %tmp5 = load i16, i16* %arrayidx10, align 2 + %conv11 = sext i16 %tmp5 to i32 + %add12 = add nsw i32 %V.0, 42 + %cmp13 = icmp slt i32 %conv11, %add12 + br i1 %cmp13, label %B1, label %C1 + +B1: ; preds = %A1 + %add16 = add nsw i32 %V.0, 42 + %idxprom17 = sext i32 %add16 to i64 + %arrayidx18 = getelementptr inbounds i32, i32* %Out, i64 %idxprom17 + %tmp6 = load i32, i32* %arrayidx18, align 4 + %inc19 = add nsw i32 %tmp6, 1 + store i32 %inc19, i32* %arrayidx18, align 4 + br label %A2 + +C1: ; preds = %A1 + %idxprom21 = sext i16 %tmp5 to i64 + %arrayidx22 = getelementptr inbounds i32, i32* %Out, i64 %idxprom21 + %tmp7 = load i32, i32* %arrayidx22, align 4 + %dec23 = add nsw i32 %tmp7, -1 + store i32 %dec23, i32* %arrayidx22, align 4 + br label %A2 + +A2: ; preds = %B1, %C1 + %arrayidx27 = getelementptr inbounds i16, i16* %In, i64 2 + %tmp8 = load i16, i16* %arrayidx27, align 2 + %conv28 = sext i16 %tmp8 to i32 + %cmp30 = icmp slt i16 %tmp8, 1041 + br i1 %cmp30, label %B2, label %C2 + +B2: ; preds = %A2 + %arrayidx35 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp9 = load i32, i32* %arrayidx35, align 4 + %inc36 = add nsw i32 %tmp9, 1 + store i32 %inc36, i32* %arrayidx35, align 4 + br label %A3 + +C2: ; preds = %A2 + %idxprom38 = sext i16 %tmp8 to i64 + %arrayidx39 = getelementptr inbounds i32, i32* %Out, i64 %idxprom38 + %tmp10 = load i32, i32* %arrayidx39, align 4 + %dec40 = add nsw i32 %tmp10, -1 + store i32 %dec40, i32* %arrayidx39, align 4 + br label %A3 + +A3: ; preds = %B2, %C2 + %V.1 = phi i32 [ 1041, %B2 ], [ %conv28, %C2 ] + %arrayidx44 = getelementptr inbounds i16, i16* %In, i64 3 + %tmp11 = load i16, i16* %arrayidx44, align 2 + %conv45 = sext i16 %tmp11 to i32 + %add46 = add nsw i32 %V.1, 42 + %cmp47 = icmp slt i32 %conv45, %add46 + br i1 %cmp47, label %B3, label %C3 + +B3: ; preds = %A3 + %add50 = add nsw i32 %V.1, 42 + %idxprom51 = sext i32 %add50 to i64 + %arrayidx52 = getelementptr inbounds i32, i32* %Out, i64 %idxprom51 + %tmp12 = load i32, i32* %arrayidx52, align 4 + %inc53 = add nsw i32 %tmp12, 1 + store i32 %inc53, i32* %arrayidx52, align 4 + br label %A4 + +C3: ; preds = %A3 + %idxprom55 = sext i16 %tmp11 to i64 + %arrayidx56 = getelementptr inbounds i32, i32* %Out, i64 %idxprom55 + %tmp13 = load i32, i32* %arrayidx56, align 4 + %dec57 = add nsw i32 %tmp13, -1 + store i32 %dec57, i32* %arrayidx56, align 4 + br label %A4 + +A4: ; preds = %B3, %C3 + %arrayidx61 = getelementptr inbounds i16, i16* %In, i64 4 + %tmp14 = load i16, i16* %arrayidx61, align 2 + %conv62 = sext i16 %tmp14 to i32 + %cmp64 = icmp slt i16 %tmp14, 1041 + br i1 %cmp64, label %B4, label %C4 + +B4: ; preds = %A4 + %arrayidx69 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp15 = load i32, i32* %arrayidx69, align 4 + %inc70 = add nsw i32 %tmp15, 1 + store i32 %inc70, i32* %arrayidx69, align 4 + br label %A5 + +C4: ; preds = %A4 + %idxprom72 = sext i16 %tmp14 to i64 + %arrayidx73 = getelementptr inbounds i32, i32* %Out, i64 %idxprom72 + %tmp16 = load i32, i32* %arrayidx73, align 4 + %dec74 = add nsw i32 %tmp16, -1 + store i32 %dec74, i32* %arrayidx73, align 4 + %phitmp = add nsw i32 %conv62, 42 + br label %A5 + +A5: ; preds = %B4, %C4 + %V.2 = phi i32 [ 1083, %B4 ], [ %phitmp, %C4 ] + %arrayidx78 = getelementptr inbounds i16, i16* %In, i64 5 + %tmp17 = load i16, i16* %arrayidx78, align 2 + %conv79 = sext i16 %tmp17 to i32 + %cmp81 = icmp slt i32 %conv79, %V.2 + br i1 %cmp81, label %B5, label %C5 + +B5: ; preds = %A5 + %idxprom85 = sext i32 %V.2 to i64 + %arrayidx86 = getelementptr inbounds i32, i32* %Out, i64 %idxprom85 + %tmp18 = load i32, i32* %arrayidx86, align 4 + %inc87 = add nsw i32 %tmp18, 1 + store i32 %inc87, i32* %arrayidx86, align 4 + br label %A6 + +C5: ; preds = %A5 + %idxprom89 = sext i16 %tmp17 to i64 + %arrayidx90 = getelementptr inbounds i32, i32* %Out, i64 %idxprom89 + %tmp19 = load i32, i32* %arrayidx90, align 4 + %dec91 = add nsw i32 %tmp19, -1 + store i32 %dec91, i32* %arrayidx90, align 4 + br label %A6 + +A6: ; preds = %B5, %C5 + %arrayidx95 = getelementptr inbounds i16, i16* %In, i64 6 + %tmp20 = load i16, i16* %arrayidx95, align 2 + %conv96 = sext i16 %tmp20 to i32 + %cmp98 = icmp slt i16 %tmp20, 1041 + br i1 %cmp98, label %B6, label %C6 + +B6: ; preds = %A6 + %arrayidx103 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp21 = load i32, i32* %arrayidx103, align 4 + %inc104 = add nsw i32 %tmp21, 1 + store i32 %inc104, i32* %arrayidx103, align 4 + br label %A7 + +C6: ; preds = %A6 + %idxprom106 = sext i16 %tmp20 to i64 + %arrayidx107 = getelementptr inbounds i32, i32* %Out, i64 %idxprom106 + %tmp22 = load i32, i32* %arrayidx107, align 4 + %dec108 = add nsw i32 %tmp22, -1 + store i32 %dec108, i32* %arrayidx107, align 4 + %phitmp1 = add nsw i32 %conv96, 42 + br label %A7 + +A7: ; preds = %B6, %C6 + %V.3 = phi i32 [ 1083, %B6 ], [ %phitmp1, %C6 ] + %arrayidx112 = getelementptr inbounds i16, i16* %In, i64 7 + %tmp23 = load i16, i16* %arrayidx112, align 2 + %conv113 = sext i16 %tmp23 to i32 + %cmp115 = icmp slt i32 %conv113, %V.3 + br i1 %cmp115, label %B7, label %C7 + +B7: ; preds = %A7 + %idxprom119 = sext i32 %V.3 to i64 + %arrayidx120 = getelementptr inbounds i32, i32* %Out, i64 %idxprom119 + %tmp24 = load i32, i32* %arrayidx120, align 4 + %inc121 = add nsw i32 %tmp24, 1 + store i32 %inc121, i32* %arrayidx120, align 4 + br label %A8 + +C7: ; preds = %A7 + %idxprom123 = sext i16 %tmp23 to i64 + %arrayidx124 = getelementptr inbounds i32, i32* %Out, i64 %idxprom123 + %tmp25 = load i32, i32* %arrayidx124, align 4 + %dec125 = add nsw i32 %tmp25, -1 + store i32 %dec125, i32* %arrayidx124, align 4 + br label %A8 + +A8: ; preds = %B7, %C7 + %arrayidx129 = getelementptr inbounds i16, i16* %In, i64 8 + %tmp26 = load i16, i16* %arrayidx129, align 2 + %cmp132 = icmp slt i16 %tmp26, 1041 + br i1 %cmp132, label %B8, label %C8 + +B8: ; preds = %A8 + %arrayidx137 = getelementptr inbounds i32, i32* %Out, i64 1041 + %tmp27 = load i32, i32* %arrayidx137, align 4 + %inc138 = add nsw i32 %tmp27, 1 + store i32 %inc138, i32* %arrayidx137, align 4 + br label %FINAL + +C8: ; preds = %A8 + %idxprom140 = sext i16 %tmp26 to i64 + %arrayidx141 = getelementptr inbounds i32, i32* %Out, i64 %idxprom140 + %tmp28 = load i32, i32* %arrayidx141, align 4 + %dec142 = add nsw i32 %tmp28, -1 + store i32 %dec142, i32* %arrayidx141, align 4 + %phitmp2 = sext i16 %tmp26 to i64 + br label %FINAL + +FINAL: ; preds = %C8, %B8 + %V.4 = phi i64 [ 1041, %B8 ], [ %phitmp2, %C8 ] + %arrayidx145 = getelementptr inbounds i32, i32* %Out, i64 %V.4 + %tmp29 = load i32, i32* %arrayidx145, align 4 + %inc146 = add nsw i32 %tmp29, 1 + store i32 %inc146, i32* %arrayidx145, align 4 + br label %ScopExit + +ScopExit: + ret void +} diff --git a/polly/test/ScopInfo/complex-successor-structure.ll b/polly/test/ScopInfo/complex-successor-structure.ll index 83871cf72cdb..1be569487e80 100644 --- a/polly/test/ScopInfo/complex-successor-structure.ll +++ b/polly/test/ScopInfo/complex-successor-structure.ll @@ -1,9 +1,12 @@ ; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops \ ; RUN: < %s 2>&1 | FileCheck %s -; We build scops from a region of for.body->B13 having successor nodes -; of following form and check that the domain construction does not take a huge -; amount of time. +; We build a scop from the region for.body->B13. The CFG is of the +; following form. The test checks that the condition construction does not take +; a huge amount of time. While we can propagate the domain constraints from +; B(X) to B(X+1) the conditions in B(X+1) will exponentially grow the number +; of needed constraints (it is basically the condition of B(X) + one smax), +; thus we should bail out at some point. ; ; CHECK: Low complexity assumption: { : 1 = 0 } diff --git a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll index 7db4f5f2a817..95b159fb6959 100644 --- a/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll +++ b/polly/test/ScopInfo/intra_and_inter_bb_scalar_dep.ll @@ -15,7 +15,7 @@ ; CHECK: Invariant Accesses: { ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] ; CHECK-NEXT: [N] -> { Stmt_for_j[i0, i1] -> MemRef_init_ptr[0] }; -; CHECK-NEXT: Execution Context: [N] -> { : N < 0 or N > 0 } +; CHECK-NEXT: Execution Context: [N] -> { : N > 0 } ; CHECK-NEXT: } ; ; CHECK: Statements { diff --git a/polly/test/ScopInfo/non-affine-region-with-loop-2.ll b/polly/test/ScopInfo/non-affine-region-with-loop-2.ll index 8ab0458e9202..555c6e120076 100644 --- a/polly/test/ScopInfo/non-affine-region-with-loop-2.ll +++ b/polly/test/ScopInfo/non-affine-region-with-loop-2.ll @@ -8,7 +8,7 @@ ; CHECK: [indvar] -> { Stmt_loop3[i0] -> [0, 0] : indvar >= 101 or indvar <= 99 }; ; CHECK: Stmt_loop2__TO__loop ; CHECK: Domain := -; CHECK: [indvar] -> { Stmt_loop2__TO__loop[] : indvar <= 99 or indvar >= 101 }; +; CHECK: [indvar] -> { Stmt_loop2__TO__loop[] : indvar >= 101 or indvar <= 99 }; ; CHECK: Schedule := ; CHECK: [indvar] -> { Stmt_loop2__TO__loop[] -> [1, 0] : indvar >= 101 or indvar <= 99 }; ; diff --git a/polly/test/ScopInfo/non_affine_region_1.ll b/polly/test/ScopInfo/non_affine_region_1.ll index 7f5fb4a24e74..a6e89545f75a 100644 --- a/polly/test/ScopInfo/non_affine_region_1.ll +++ b/polly/test/ScopInfo/non_affine_region_1.ll @@ -18,9 +18,6 @@ ; } ; } ; -; TODO: We build a complicated representation of Stmt_bb10__TO__bb18's domain that will also complicate the schedule. -; Once the domain is simple this test should fail and this TODO can be removed. - ; CHECK: Statements { ; CHECK-NEXT: Stmt_bb3 ; CHECK-NEXT: Domain := @@ -38,16 +35,16 @@ ; CHECK-NEXT: [b] -> { Stmt_bb7[i0] -> MemRef_x_1__phi[] }; ; CHECK-NEXT: Stmt_bb8 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [b] -> { Stmt_bb8[0] : b = 0 }; +; CHECK-NEXT: [b] -> { Stmt_bb8[i0] : i0 >= b and 0 <= i0 <= 1023 and 2i0 <= b }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [b] -> { Stmt_bb8[i0] -> [0, 0] }; ; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [b] -> { Stmt_bb8[i0] -> MemRef_x_1__phi[] }; ; CHECK-NEXT: Stmt_bb10__TO__bb18 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] : 0 <= i0 <= 1023 and (i0 < b or (i0 >= b and 2i0 > b)); Stmt_bb10__TO__bb18[0] : b = 0 }; +; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] : 0 <= i0 <= 1023 }; ; CHECK-NEXT: Schedule := -; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] -> [i0, 3] : i0 < b or (i0 >= b and 2i0 > b); Stmt_bb10__TO__bb18[0] -> [0, 3] : b = 0 }; +; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] -> [i0, 3] } ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] ; CHECK-NEXT: [b] -> { Stmt_bb10__TO__bb18[i0] -> MemRef_x_1__phi[] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] diff --git a/polly/test/ScopInfo/pointer-used-as-base-pointer-and-scalar-read.ll b/polly/test/ScopInfo/pointer-used-as-base-pointer-and-scalar-read.ll index 11b0e3318146..c34b3f1f01e0 100644 --- a/polly/test/ScopInfo/pointer-used-as-base-pointer-and-scalar-read.ll +++ b/polly/test/ScopInfo/pointer-used-as-base-pointer-and-scalar-read.ll @@ -37,7 +37,7 @@ ; CHECK-NEXT: [p] -> { Stmt_else[i0] -> MemRef_x__phi[] }; ; CHECK-NEXT: Stmt_bb8 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [p] -> { Stmt_bb8[i0] : 0 <= i0 <= 999 and (p >= 33 or p <= 32) }; +; CHECK-NEXT: [p] -> { Stmt_bb8[i0] : 0 <= i0 <= 999 }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [p] -> { Stmt_bb8[i0] -> [i0, 2] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1] diff --git a/polly/test/ScopInfo/remarks.ll b/polly/test/ScopInfo/remarks.ll index e35d38d44464..c10fe001acaf 100644 --- a/polly/test/ScopInfo/remarks.ll +++ b/polly/test/ScopInfo/remarks.ll @@ -1,10 +1,10 @@ ; RUN: opt %loadPolly -pass-remarks-analysis="polly-scops" -polly-scops -disable-output < %s 2>&1 | FileCheck %s ; ; CHECK: remark: test/ScopInfo/remarks.c:4:7: SCoP begins here. -; CHECK: remark: test/ScopInfo/remarks.c:8:5: Finite loop restriction: [M, N, Debug] -> { : N > 0 and (M <= -2 or M = -1) } -; CHECK: remark: test/ScopInfo/remarks.c:13:7: No-error restriction: [M, N, Debug] -> { : M >= 0 and N > 0 and (Debug < 0 or Debug > 0) } -; CHECK: remark: test/ScopInfo/remarks.c:9:7: Inbounds assumption: [M, N, Debug] -> { : M <= 100 or (M > 0 and N <= 0) } -; CHECK: remark: :0:0: No-overflows restriction: [N, M, Debug] -> { : M <= -2147483649 - N or M >= 2147483648 - N } +; CHECK: remark: test/ScopInfo/remarks.c:8:5: Finite loop restriction: [N, M] -> { : N > 0 and (M <= -2 or M = -1) } +; CHECK: remark: test/ScopInfo/remarks.c:13:7: No-error restriction: [N, M, Debug] -> { : N > 0 and M >= 0 and (Debug < 0 or Debug > 0) } +; CHECK: remark: test/ScopInfo/remarks.c:9:7: Inbounds assumption: [N, M] -> { : N <= 0 or (N > 0 and M <= 100) } +; CHECK: remark: :0:0: No-overflows restriction: [N, M, Debug] -> { : M <= -2147483649 - N or M >= 2147483648 - N } ; CHECK: remark: test/ScopInfo/remarks.c:9:18: Possibly aliasing pointer, use restrict keyword. ; CHECK: remark: test/ScopInfo/remarks.c:9:33: Possibly aliasing pointer, use restrict keyword. ; CHECK: remark: test/ScopInfo/remarks.c:9:15: Possibly aliasing pointer, use restrict keyword. diff --git a/polly/test/ScopInfo/switch-1.ll b/polly/test/ScopInfo/switch-1.ll index 7d0d58e2b069..354f73524679 100644 --- a/polly/test/ScopInfo/switch-1.ll +++ b/polly/test/ScopInfo/switch-1.ll @@ -21,7 +21,7 @@ ; CHECK: Statements { ; CHECK-NEXT: Stmt_sw_bb_1 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [N] -> { Stmt_sw_bb_1[i0] : 4*floor((-1 + i0)/4) = -1 + i0 and 0 < i0 < N }; +; CHECK-NEXT: [N] -> { Stmt_sw_bb_1[i0] : 4*floor((-1 + i0)/4) = -1 + i0 and 0 <= i0 < N }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [N] -> { Stmt_sw_bb_1[i0] -> [i0, 2] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] @@ -30,7 +30,7 @@ ; CHECK-NEXT: [N] -> { Stmt_sw_bb_1[i0] -> MemRef_A[i0] }; ; CHECK-NEXT: Stmt_sw_bb_2 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [N] -> { Stmt_sw_bb_2[i0] : 4*floor((-2 + i0)/4) = -2 + i0 and 2 <= i0 < N }; +; CHECK-NEXT: [N] -> { Stmt_sw_bb_2[i0] : 4*floor((-2 + i0)/4) = -2 + i0 and 0 <= i0 < N }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [N] -> { Stmt_sw_bb_2[i0] -> [i0, 1] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] @@ -50,19 +50,14 @@ ; AST: if (1) ; -; AST: { -; AST-NEXT: for (int c0 = 1; c0 < N - 2; c0 += 4) { -; AST-NEXT: Stmt_sw_bb_1(c0); -; AST-NEXT: Stmt_sw_bb_2(c0 + 1); -; AST-NEXT: Stmt_sw_bb_6(c0 + 2); -; AST-NEXT: } -; AST-NEXT: if (N >= 2) -; AST-NEXT: if (N % 4 >= 2) { -; AST-NEXT: Stmt_sw_bb_1(-(N % 4) + N + 1); -; AST-NEXT: if ((N - 3) % 4 == 0) -; AST-NEXT: Stmt_sw_bb_2(N - 1); -; AST-NEXT: } -; AST-NEXT: } +; AST: for (int c0 = 1; c0 < N; c0 += 4) { +; AST-NEXT: Stmt_sw_bb_1(c0); +; AST-NEXT: if (N >= c0 + 2) { +; AST-NEXT: Stmt_sw_bb_2(c0 + 1); +; AST-NEXT: if (N >= c0 + 3) +; AST-NEXT: Stmt_sw_bb_6(c0 + 2); +; AST-NEXT: } +; AST-NEXT: } ; ; AST: else ; AST-NEXT: { /* original code */ } diff --git a/polly/test/ScopInfo/switch-2.ll b/polly/test/ScopInfo/switch-2.ll index 062fe89688f0..48c1b01dee77 100644 --- a/polly/test/ScopInfo/switch-2.ll +++ b/polly/test/ScopInfo/switch-2.ll @@ -29,7 +29,7 @@ ; CHECK-NEXT: [N] -> { Stmt_sw_bb[i0] -> MemRef_A[i0] }; ; CHECK-NEXT: Stmt_sw_bb_2 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [N] -> { Stmt_sw_bb_2[i0] : 4*floor((-2 + i0)/4) = -2 + i0 and 2 <= i0 < N }; +; CHECK-NEXT: [N] -> { Stmt_sw_bb_2[i0] : 4*floor((-2 + i0)/4) = -2 + i0 and 0 <= i0 < N }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [N] -> { Stmt_sw_bb_2[i0] -> [i0, 0] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] @@ -40,11 +40,14 @@ ; AST: if (1) ; -; AST: for (int c0 = 0; c0 < N; c0 += 4) { -; AST-NEXT: Stmt_sw_bb(c0); -; AST-NEXT: if (N >= c0 + 3) -; AST-NEXT: Stmt_sw_bb_2(c0 + 2); -; AST-NEXT: } +; AST: { +; AST-NEXT: for (int c0 = 0; c0 < N - 2; c0 += 4) { +; AST-NEXT: Stmt_sw_bb(c0); +; AST-NEXT: Stmt_sw_bb_2(c0 + 2); +; AST-NEXT: } +; AST-NEXT: if (N >= 1 && (N + 1) % 4 >= 2) +; AST-NEXT: Stmt_sw_bb(-((N + 1) % 4) + N + 1); +; AST-NEXT: } ; ; AST: else ; AST-NEXT: { /* original code */ } diff --git a/polly/test/ScopInfo/switch-4.ll b/polly/test/ScopInfo/switch-4.ll index a111f9480a63..1323ccc0dd22 100644 --- a/polly/test/ScopInfo/switch-4.ll +++ b/polly/test/ScopInfo/switch-4.ll @@ -33,7 +33,7 @@ ; CHECK-NEXT: [N] -> { Stmt_sw_bb[i0] -> MemRef_A[i0] }; ; CHECK-NEXT: Stmt_sw_bb_1 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [N] -> { Stmt_sw_bb_1[i0] : 4*floor((-1 + i0)/4) = -1 + i0 and 0 < i0 < N }; +; CHECK-NEXT: [N] -> { Stmt_sw_bb_1[i0] : 4*floor((-1 + i0)/4) = -1 + i0 and 0 <= i0 < N }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [N] -> { Stmt_sw_bb_1[i0] -> [i0, 2] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] @@ -42,7 +42,7 @@ ; CHECK-NEXT: [N] -> { Stmt_sw_bb_1[i0] -> MemRef_A[i0] }; ; CHECK-NEXT: Stmt_sw_bb_5 ; CHECK-NEXT: Domain := -; CHECK-NEXT: [N] -> { Stmt_sw_bb_5[i0] : 4*floor((-2 + i0)/4) = -2 + i0 and 2 <= i0 < N }; +; CHECK-NEXT: [N] -> { Stmt_sw_bb_5[i0] : 4*floor((-2 + i0)/4) = -2 + i0 and 0 <= i0 < N }; ; CHECK-NEXT: Schedule := ; CHECK-NEXT: [N] -> { Stmt_sw_bb_5[i0] -> [i0, 1] }; ; CHECK-NEXT: ReadAccess := [Reduction Type: +] [Scalar: 0] @@ -62,23 +62,17 @@ ; AST: if (1) ; -; AST: { -; AST-NEXT: for (int c0 = 0; c0 < N - 3; c0 += 4) { -; AST-NEXT: Stmt_sw_bb(c0); -; AST-NEXT: Stmt_sw_bb_1(c0 + 1); -; AST-NEXT: Stmt_sw_bb_5(c0 + 2); -; AST-NEXT: Stmt_sw_bb_9(c0 + 3); -; AST-NEXT: } -; AST-NEXT: if (N >= 1) -; AST-NEXT: if (N % 4 >= 1) { -; AST-NEXT: Stmt_sw_bb(-(N % 4) + N); -; AST-NEXT: if (N % 4 >= 2) { -; AST-NEXT: Stmt_sw_bb_1(-(N % 4) + N + 1); -; AST-NEXT: if ((N - 3) % 4 == 0) -; AST-NEXT: Stmt_sw_bb_5(N - 1); -; AST-NEXT: } -; AST-NEXT: } -; AST-NEXT: } +; AST: for (int c0 = 0; c0 < N; c0 += 4) { +; AST-NEXT: Stmt_sw_bb(c0); +; AST-NEXT: if (N >= c0 + 2) { +; AST-NEXT: Stmt_sw_bb_1(c0 + 1); +; AST-NEXT: if (N >= c0 + 3) { +; AST-NEXT: Stmt_sw_bb_5(c0 + 2); +; AST-NEXT: if (N >= c0 + 4) +; AST-NEXT: Stmt_sw_bb_9(c0 + 3); +; AST-NEXT: } +; AST-NEXT: } +; AST-NEXT: } ; ; AST: else ; AST-NEXT: { /* original code */ }