Skip to content

Commit

Permalink
[IndVarSimplify] Add TapirIndVarSimplify pass, a version of the IndVa…
Browse files Browse the repository at this point in the history
…rSimplify pass that applies only to Tapir loops. Run TapirIndVarSimplify before loop stripmining to help ensure that Tapir loops can be stripmined after other optimizations, such as loop peeling. Addresses issue llvm#88.
  • Loading branch information
neboat committed Jul 1, 2023
1 parent 6bfac8e commit 5ec3d08
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 14 deletions.
13 changes: 8 additions & 5 deletions clang/test/CodeGen/thinlto-distributed-newpm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,19 @@
; CHECK-O: Running pass: RecomputeGlobalsAAPass
; CHECK-O: Running pass: Float2IntPass on main
; CHECK-O: Running pass: LowerConstantIntrinsicsPass on main
; CHECK-O: Running pass: LoopStripMinePass on main
; CHECK-O: Running analysis: ScalarEvolutionAnalysis on main
; CHECK-O: Running analysis: LoopAnalysis on main
; CHECK-O: Running analysis: TaskAnalysis on main
; CHECK-O: Running pass: TaskSimplifyPass on main
; CHECK-O: Running pass: LoopSimplifyPass on main
; CHECK-O: Running analysis: LoopAnalysis on main
; CHECK-O: Running pass: LCSSAPass on main
; CHECK-O: Running analysis: AAManager on main
; CHECK-O: Running analysis: BasicAA on main
; CHECK-O: Running analysis: TaskAnalysis on main
; CHECK-O: Running analysis: ScalarEvolutionAnalysis on main
; CHECK-O: Running analysis: InnerAnalysisManagerProxy
; CHECK-O: Running pass: TapirIndVarSimplifyPass
; CHECK-O: Running pass: LoopStripMinePass on main
; CHECK-O: Running pass: TaskSimplifyPass on main
; CHECK-O: Running pass: LoopSimplifyPass on main
; CHECK-O: Running pass: LCSSAPass on main
; CHECK-O: Running pass: LoopSimplifyCFGPass on Loop at depth 1 containing: %b
; CHECK-O: Running pass: LICMPass on Loop at depth 1 containing: %b
; CHECK-O: Running pass: EarlyCSEPass on main
Expand Down
11 changes: 11 additions & 0 deletions llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,17 @@ class IndVarSimplifyPass : public PassInfoMixin<IndVarSimplifyPass> {
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};

class TapirIndVarSimplifyPass : public PassInfoMixin<TapirIndVarSimplifyPass> {
/// Perform IV widening during the pass.
bool WidenIndVars;

public:
TapirIndVarSimplifyPass(bool WidenIndVars = true)
: WidenIndVars(WidenIndVars) {}
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};

} // end namespace llvm

#endif // LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H
15 changes: 10 additions & 5 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1333,16 +1333,21 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// Stripmine Tapir loops, if pass is enabled.
if (PTO.LoopStripmine && Level != OptimizationLevel::O1 &&
!Level.isOptimizingForSize()) {
LoopPassManager LPM1, LPM2;
LPM1.addPass(TapirIndVarSimplifyPass());
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(std::move(LPM1),
/*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
OptimizePM.addPass(LoopStripMinePass());
// Cleanup tasks after stripmining loops.
OptimizePM.addPass(TaskSimplifyPass());
// Cleanup after stripmining loops.
LoopPassManager LPM;
LPM.addPass(LoopSimplifyCFGPass());
LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
LPM2.addPass(LoopSimplifyCFGPass());
LPM2.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(std::move(LPM),
createFunctionToLoopPassAdaptor(std::move(LPM2),
/*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
// Don't run IndVarSimplify at this point, as it can actually inhibit
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ LOOP_PASS("guard-widening", GuardWideningPass())
LOOP_PASS("loop-bound-split", LoopBoundSplitPass())
LOOP_PASS("loop-reroll", LoopRerollPass())
LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
LOOP_PASS("tapir-indvars", TapirIndVarSimplifyPass())
#undef LOOP_PASS

#ifndef LOOP_PASS_WITH_PARAMS
Expand Down
32 changes: 28 additions & 4 deletions llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ class IndVarSimplify {

SmallVector<WeakTrackingVH, 16> DeadInsts;
bool WidenIndVars;
bool TapirLoopsOnly;

bool handleFloatingPointIV(Loop *L, PHINode *PH);
bool rewriteNonIntegerIVs(Loop *L);
Expand Down Expand Up @@ -178,9 +179,9 @@ class IndVarSimplify {
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
TargetTransformInfo *TTI, MemorySSA *MSSA, TaskInfo *TI,
bool WidenIndVars)
bool WidenIndVars, bool TapirLoopsOnly)
: LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), TI(TI),
WidenIndVars(WidenIndVars) {
WidenIndVars(WidenIndVars), TapirLoopsOnly(TapirLoopsOnly) {
if (MSSA)
MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
}
Expand Down Expand Up @@ -2096,6 +2097,8 @@ bool IndVarSimplify::run(Loop *L) {
return false;

bool IsTapirLoop = (nullptr != getTaskIfTapirLoop(L, TI));
if (TapirLoopsOnly && !IsTapirLoop)
return false;
#ifndef NDEBUG
// Used below for a consistency check only
// Note: Since the result returned by ScalarEvolution may depend on the order
Expand Down Expand Up @@ -2294,7 +2297,27 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
const DataLayout &DL = F->getParent()->getDataLayout();

IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
&AR.TI, WidenIndVars && AllowIVWidening);
&AR.TI, WidenIndVars && AllowIVWidening,
/*TapirLoopsOnly=*/false);
if (!IVS.run(&L))
return PreservedAnalyses::all();

auto PA = getLoopPassPreservedAnalyses();
PA.preserveSet<CFGAnalyses>();
if (AR.MSSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
}

PreservedAnalyses TapirIndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
Function *F = L.getHeader()->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();

IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
&AR.TI, WidenIndVars && AllowIVWidening,
/*TapirLoopsOnly=*/true);
if (!IVS.run(&L))
return PreservedAnalyses::all();

Expand Down Expand Up @@ -2332,7 +2355,8 @@ struct IndVarSimplifyLegacyPass : public LoopPass {
if (MSSAAnalysis)
MSSA = &MSSAAnalysis->getMSSA();

IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, TI, AllowIVWidening);
IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, TI, AllowIVWidening,
/*TapirLoopsOnly=*/false);
return IVS.run(L);
}

Expand Down
6 changes: 6 additions & 0 deletions llvm/test/Other/new-pm-defaults.ll
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@
; CHECK-O-NEXT: Running pass: LowerConstantIntrinsicsPass on foo
; CHECK-MATRIX: Running pass: LowerMatrixIntrinsicsPass on f
; CHECK-MATRIX-NEXT: Running pass: EarlyCSEPass on f
; CHECK-O2-NEXT: Running pass: LoopSimplifyPass
; CHECK-O2-NEXT: Running pass: LCSSAPass
; CHECK-O2-NEXT: Running pass: TapirIndVarSimplifyPass
; CHECK-O2-NEXT: Running pass: LoopStripMinePass
; CHECK-O2-NEXT: Running pass: TaskSimplifyPass on foo
; CHECK-O2-NEXT: Running pass: LoopSimplifyPass
Expand All @@ -249,6 +252,9 @@
; CHECK-O2-NEXT: Running pass: CorrelatedValuePropagationPass
; CHECK-O2-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O2-NEXT: Running pass: InstCombinePass
; CHECK-O3-NEXT: Running pass: LoopSimplifyPass
; CHECK-O3-NEXT: Running pass: LCSSAPass
; CHECK-O3-NEXT: Running pass: TapirIndVarSimplifyPass
; CHECK-O3-NEXT: Running pass: LoopStripMinePass
; CHECK-O3-NEXT: Running pass: TaskSimplifyPass on foo
; CHECK-O3-NEXT: Running pass: LoopSimplifyPass
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/Other/new-pm-thinlto-defaults.ll
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,9 @@
; CHECK-POST-EP-OPT-EARLY-NEXT: Running pass: NoOpModulePass
; CHECK-POSTLINK-O-NEXT: Running pass: Float2IntPass
; CHECK-POSTLINK-O-NEXT: Running pass: LowerConstantIntrinsicsPass
; CHECK-POSTLINK-O2-NEXT: Running pass: LoopSimplifyPass
; CHECK-POSTLINK-O2-NEXT: Running pass: LCSSAPass
; CHECK-POSTLINK-O2-NEXT: Running pass: TapirIndVarSimplifyPass
; CHECK-POSTLINK-O2-NEXT: Running pass: LoopStripMinePass on foo
; CHECK-POSTLINK-O2-NEXT: Running pass: TaskSimplifyPass on foo
; CHECK-POSTLINK-O2-NEXT: Running pass: LoopSimplifyPass on foo
Expand All @@ -217,6 +220,9 @@
; CHECK-POSTLINK-O2-NEXT: Running pass: CorrelatedValuePropagationPass
; CHECK-POSTLINK-O2-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-POSTLINK-O2-NEXT: Running pass: InstCombinePass
; CHECK-POSTLINK-O3-NEXT: Running pass: LoopSimplifyPass
; CHECK-POSTLINK-O3-NEXT: Running pass: LCSSAPass
; CHECK-POSTLINK-O3-NEXT: Running pass: TapirIndVarSimplifyPass
; CHECK-POSTLINK-O3-NEXT: Running pass: LoopStripMinePass on foo
; CHECK-POSTLINK-O3-NEXT: Running pass: TaskSimplifyPass on foo
; CHECK-POSTLINK-O3-NEXT: Running pass: LoopSimplifyPass on foo
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@
; CHECK-O-NEXT: Running pass: RecomputeGlobalsAAPass
; CHECK-O-NEXT: Running pass: Float2IntPass
; CHECK-O-NEXT: Running pass: LowerConstantIntrinsicsPass
; CHECK-O2-NEXT: Running pass: LoopSimplifyPass
; CHECK-O2-NEXT: Running pass: LCSSAPass
; CHECK-O2-NEXT: Running pass: TapirIndVarSimplifyPass
; CHECK-O2-NEXT: Running pass: LoopStripMinePass
; CHECK-O2-NEXT: Running pass: TaskSimplifyPass
; CHECK-O2-NEXT: Running pass: LoopSimplifyPass
Expand All @@ -176,6 +179,9 @@
; CHECK-O2-NEXT: Running pass: CorrelatedValuePropagationPass
; CHECK-O2-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O2-NEXT: Running pass: InstCombinePass
; CHECK-O3-NEXT: Running pass: LoopSimplifyPass
; CHECK-O3-NEXT: Running pass: LCSSAPass
; CHECK-O3-NEXT: Running pass: TapirIndVarSimplifyPass
; CHECK-O3-NEXT: Running pass: LoopStripMinePass
; CHECK-O3-NEXT: Running pass: TaskSimplifyPass
; CHECK-O3-NEXT: Running pass: LoopSimplifyPass
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@
; CHECK-O-NEXT: Running pass: RecomputeGlobalsAAPass
; CHECK-O-NEXT: Running pass: Float2IntPass
; CHECK-O-NEXT: Running pass: LowerConstantIntrinsicsPass
; CHECK-O2-NEXT: Running pass: LoopSimplifyPass
; CHECK-O2-NEXT: Running pass: LCSSAPass
; CHECK-O2-NEXT: Running pass: TapirIndVarSimplifyPass
; CHECK-O2-NEXT: Running pass: LoopStripMinePass
; CHECK-O2-NEXT: Running pass: TaskSimplifyPass
; CHECK-O2-NEXT: Running pass: LoopSimplifyPass
Expand All @@ -188,6 +191,9 @@
; CHECK-O2-NEXT: Running pass: CorrelatedValuePropagationPass
; CHECK-O2-NEXT: Invalidating analysis: LazyValueAnalysis
; CHECK-O2-NEXT: Running pass: InstCombinePass
; CHECK-O3-NEXT: Running pass: LoopSimplifyPass
; CHECK-O3-NEXT: Running pass: LCSSAPass
; CHECK-O3-NEXT: Running pass: TapirIndVarSimplifyPass
; CHECK-O3-NEXT: Running pass: LoopStripMinePass
; CHECK-O3-NEXT: Running pass: TaskSimplifyPass
; CHECK-O3-NEXT: Running pass: LoopSimplifyPass
Expand Down
87 changes: 87 additions & 0 deletions llvm/test/Transforms/Tapir/missed-loop-opts-test.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
; Check that Tapir loops can be peeled and subsequently stripmined.
;
; RUN: opt < %s -passes='loop(loop-unroll-full),gvn,loop(tapir-indvars),loop-stripmine' -S | FileCheck %s

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: argmemonly norecurse nounwind uwtable
define dso_local void @test(i64 noundef %size, i64* nocapture noundef %data) local_unnamed_addr #0 {
entry:
%syncreg = tail call token @llvm.syncregion.start()
%cmp.not = icmp eq i64 %size, 0
br i1 %cmp.not, label %cleanup, label %pfor.cond.preheader

pfor.cond.preheader: ; preds = %entry
br label %pfor.cond

; CHECK: pfor.cond.preheader:
; CHECK-NEXT: detach within %syncreg, label %pfor.body.peel, label %pfor.inc.peel

; CHECK: pfor.body.peel:
; CHECK-NEXT: br i1 true, label %if.then.peel, label %if.else.peel

; CHECK: pfor.inc.peel:
; CHECK-NEXT: %[[EXITCOND_PEEL:.+]] = icmp ne i64 1, %size
; CHECK-NEXT: br i1 %[[EXITCOND_PEEL]], label %[[PFOR_COND_AFTER_PEEL:.+]], label %pfor.cond.cleanup

pfor.cond: ; preds = %pfor.cond.preheader, %pfor.inc
%__begin.0 = phi i64 [ %inc, %pfor.inc ], [ 0, %pfor.cond.preheader ]
detach within %syncreg, label %pfor.body, label %pfor.inc

; CHECK: pfor.cond.strpm.outer:
; CHECK: detach within %syncreg, label %pfor.body.strpm.outer, label %pfor.inc.strpm.outer

; CHECK: pfor.inc.strpm.outer:
; CHECK: br i1 %{{.+}}, label %{{.+}} label %pfor.cond.strpm.outer

pfor.body: ; preds = %pfor.cond
%cmp3 = icmp eq i64 %__begin.0, 0
br i1 %cmp3, label %if.then, label %if.else

if.then: ; preds = %pfor.body
%0 = load i64, i64* %data, align 8, !tbaa !3
%mul4 = shl i64 %0, 2
store i64 %mul4, i64* %data, align 8, !tbaa !3
br label %pfor.preattach

if.else: ; preds = %pfor.body
%arrayidx5 = getelementptr inbounds i64, i64* %data, i64 %__begin.0
%1 = load i64, i64* %arrayidx5, align 8, !tbaa !3
%mul6 = shl i64 %1, 1
store i64 %mul6, i64* %arrayidx5, align 8, !tbaa !3
br label %pfor.preattach

pfor.preattach: ; preds = %if.then, %if.else
reattach within %syncreg, label %pfor.inc

pfor.inc: ; preds = %pfor.preattach, %pfor.cond
%inc = add nuw i64 %__begin.0, 1
%exitcond = icmp ne i64 %inc, %size
br i1 %exitcond, label %pfor.cond, label %pfor.cond.cleanup, !llvm.loop !7

pfor.cond.cleanup: ; preds = %pfor.inc
sync within %syncreg, label %cleanup

cleanup: ; preds = %pfor.cond.cleanup, %entry
ret void
}

; Function Attrs: argmemonly mustprogress nounwind willreturn
declare token @llvm.syncregion.start() #1

attributes #0 = { argmemonly norecurse nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { argmemonly mustprogress nounwind willreturn }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"uwtable", i32 1}
!2 = !{!"clang version 14.0.5 (git@github.com:OpenCilk/opencilk-project.git b924d7dea48d317137eb07823b63f9c5fbfbf341)"}
!3 = !{!4, !4, i64 0}
!4 = !{!"long", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}
!7 = distinct !{!7, !8}
!8 = !{!"tapir.loop.spawn.strategy", i32 1}

0 comments on commit 5ec3d08

Please sign in to comment.