Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions llvm/lib/Passes/PassBuilderPipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,15 @@ static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(false),
cl::Hidden,
cl::desc("Enable the LoopFlatten Pass"));

// Experimentally allow loop header duplication. This should allow for better
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW I don't think allowing loop-rotation for all loops to simplify some to memsets is the best way forward here, as I don't see any path towards having this enabled by default.

If this ends up being profitable, we should drop this flag and
// making a code gen option that can be controlled independent of the opt level
// and exposed through clang.

It is not clear to me what profitable means here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the new wording should be less ambiguous. Happy to change it if you have a preference, though.

// optimization at Oz, since loop-idiom recognition can then recognize things
// like memcpy. If this ends up being useful for many targets, we should drop
// this flag and make a code generation option that can be controlled
// independent of the opt level and exposed through the frontend.
static cl::opt<bool> EnableLoopHeaderDuplication(
"enable-loop-header-duplication", cl::init(false), cl::Hidden,
cl::desc("Enable loop header duplication at any optimization level"));

static cl::opt<bool>
EnableDFAJumpThreading("enable-dfa-jump-thread",
cl::desc("Enable DFA jump threading"),
Expand Down Expand Up @@ -630,8 +639,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
/*AllowSpeculation=*/false));

// Disable header duplication in loop rotation at -Oz.
LPM1.addPass(
LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
Level != OptimizationLevel::Oz,
isLTOPreLink(Phase)));
// TODO: Investigate promotion cap for O1.
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
Expand Down Expand Up @@ -812,7 +822,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
// Disable header duplication in loop rotation at -Oz.
MPM.addPass(createModuleToFunctionPassAdaptor(
createFunctionToLoopPassAdaptor(
LoopRotatePass(Level != OptimizationLevel::Oz),
LoopRotatePass(EnableLoopHeaderDuplication ||
Level != OptimizationLevel::Oz),
/*UseMemorySSA=*/false,
/*UseBlockFrequencyInfo=*/false),
PTO.EagerlyInvalidateAnalyses));
Expand Down Expand Up @@ -1422,7 +1433,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
LoopPassManager LPM;
// First rotate loops that may have been un-rotated by prior passes.
// Disable header duplication at -Oz.
LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink));
LPM.addPass(LoopRotatePass(EnableLoopHeaderDuplication ||
Level != OptimizationLevel::Oz,
LTOPreLink));
// Some loops may have become dead by now. Try to delete them.
// FIXME: see discussion in https://reviews.llvm.org/D112851,
// this may need to be revisited once we run GVN before loop deletion
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/Transforms/LoopRotate/oz-disable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
; RUN: opt < %s -S -passes='default<Os>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
; RUN: opt < %s -S -passes='default<Oz>' -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ

;; Make sure -allow-loop-header-duplication overrides the default behavior at Oz
; RUN: opt < %s -S -passes='default<Oz>' -enable-loop-header-duplication -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS

; Loop should be rotated for -Os but not for -Oz.
; OS: rotating Loop at depth 1
; OZ-NOT: rotating Loop at depth 1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3

;; Check that -enable-loop-header-duplication at Oz enables certain types of
;; optimizations, for example replacing the loop body w/ a call to memset. If
;; loop idiom recognition begins to recognize unrotated loops, this test will
;; need to be updated.

; RUN: opt -passes='default<Oz>' -S < %s | FileCheck %s --check-prefix=NOROTATION
; RUN: opt -passes='default<Oz>' -S -enable-loop-header-duplication < %s | FileCheck %s --check-prefix=ROTATION
; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s --check-prefix=ROTATION

define void @test(i8* noalias nonnull align 1 %start, i8* %end) unnamed_addr {
; NOROTATION-LABEL: define void @test(
; NOROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
; NOROTATION-NEXT: entry:
; NOROTATION-NEXT: br label [[LOOP_HEADER:%.*]]
; NOROTATION: loop.header:
; NOROTATION-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; NOROTATION-NEXT: [[_12_I:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
; NOROTATION-NEXT: br i1 [[_12_I]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
; NOROTATION: loop.latch:
; NOROTATION-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
; NOROTATION-NEXT: store i8 1, ptr [[PTR_IV]], align 1
; NOROTATION-NEXT: br label [[LOOP_HEADER]]
; NOROTATION: exit:
; NOROTATION-NEXT: ret void
;
; ROTATION-LABEL: define void @test(
; ROTATION-SAME: ptr noalias nonnull writeonly align 1 [[START:%.*]], ptr readnone [[END:%.*]]) unnamed_addr #[[ATTR0:[0-9]+]] {
; ROTATION-NEXT: entry:
; ROTATION-NEXT: [[_12_I1:%.*]] = icmp eq ptr [[START]], [[END]]
; ROTATION-NEXT: br i1 [[_12_I1]], label [[EXIT:%.*]], label [[LOOP_LATCH_PREHEADER:%.*]]
; ROTATION: loop.latch.preheader:
; ROTATION-NEXT: [[END3:%.*]] = ptrtoint ptr [[END]] to i64
; ROTATION-NEXT: [[START4:%.*]] = ptrtoint ptr [[START]] to i64
; ROTATION-NEXT: [[TMP0:%.*]] = sub i64 [[END3]], [[START4]]
; ROTATION-NEXT: tail call void @llvm.memset.p0.i64(ptr nonnull align 1 [[START]], i8 1, i64 [[TMP0]], i1 false)
; ROTATION-NEXT: br label [[EXIT]]
; ROTATION: exit:
; ROTATION-NEXT: ret void
;
entry:
br label %loop.header

loop.header:
%ptr.iv = phi i8* [ %start, %entry ], [ %ptr.iv.next, %loop.latch ]
%_12.i = icmp eq i8* %ptr.iv, %end
br i1 %_12.i, label %exit, label %loop.latch

loop.latch:
%ptr.iv.next = getelementptr inbounds i8, i8* %ptr.iv, i64 1
store i8 1, i8* %ptr.iv, align 1
br label %loop.header

exit:
ret void
}