Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
1059eb7
[LAA] Always use DepCands when grouping runtime checks.
fhahn May 5, 2024
54ebebf
Merge branch 'main' into laa-unknown-dep-rt-checks
fhahn May 13, 2024
fc9cee7
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn May 22, 2024
2943e88
!fixup update new tests.
fhahn May 22, 2024
98e7a47
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Jul 7, 2024
1059148
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Jul 8, 2024
029a21d
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Aug 17, 2025
6612413
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Aug 19, 2025
9189e84
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Aug 19, 2025
a12665a
Update after merge.
fhahn Aug 19, 2025
1772f07
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Aug 20, 2025
9329501
!fixup update after udpating to main.
fhahn Aug 20, 2025
6458ad6
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Aug 21, 2025
b9673a5
!fixup clear DepCands if getDependences fails.
fhahn Aug 21, 2025
46e99a2
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Aug 21, 2025
7b6d6b9
!fixup clear dependencies after no longer needed
fhahn Aug 21, 2025
e78769c
Merge remote-tracking branch 'origin/main' into laa-unknown-dep-rt-ch…
fhahn Sep 10, 2025
73825ac
!fixup address comments, thanks
fhahn Sep 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions llvm/include/llvm/ADT/EquivalenceClasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,19 @@ template <class ElemTy> class EquivalenceClasses {
return member_iterator(ECV.getLeader());
}

/// Erase the class containing \p V, i.e. erase all members of the class from
/// the set.
void eraseClass(const ElemTy &V) {
if (TheMapping.find(V) == TheMapping.end())
return;
iterator_range<member_iterator> LeaderI = members(V);
for (member_iterator MI = LeaderI.begin(), ME = LeaderI.end(); MI != ME;) {
const ElemTy &ToErase = *MI;
++MI;
TheMapping.erase(ToErase);
}
}

/// union - Merge the two equivalence sets for the specified values, inserting
/// them if they do not already exist in the equivalence set.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2) {
Expand Down
9 changes: 3 additions & 6 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,8 +562,7 @@ class RuntimePointerChecking {

/// Generate the checks and store it. This also performs the grouping
/// of pointers to reduce the number of memchecks necessary.
LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
bool UseDependencies);
LLVM_ABI void generateChecks(MemoryDepChecker::DepCandidates &DepCands);

/// Returns the checks that generateChecks created. They can be used to ensure
/// no read/write accesses overlap across all loop iterations.
Expand Down Expand Up @@ -630,10 +629,8 @@ class RuntimePointerChecking {
private:
/// Groups pointers such that a single memcheck is required
/// between two different groups. This will clear the CheckingGroups vector
/// and re-compute it. We will only group dependecies if \p UseDependencies
/// is true, otherwise we will create a separate group for each pointer.
void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
bool UseDependencies);
/// and re-compute it.
void groupChecks(MemoryDepChecker::DepCandidates &DepCands);

/// Generate the checks and return them.
SmallVector<RuntimePointerCheck, 4> generateChecks();
Expand Down
82 changes: 52 additions & 30 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,9 +526,9 @@ SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
}

void RuntimePointerChecking::generateChecks(
MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
MemoryDepChecker::DepCandidates &DepCands) {
assert(Checks.empty() && "Checks is not empty");
groupChecks(DepCands, UseDependencies);
groupChecks(DepCands);
Checks = generateChecks();
}

Expand Down Expand Up @@ -591,7 +591,7 @@ bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
}

void RuntimePointerChecking::groupChecks(
MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
MemoryDepChecker::DepCandidates &DepCands) {
// We build the groups from dependency candidates equivalence classes
// because:
// - We know that pointers in the same equivalence class share
Expand Down Expand Up @@ -628,19 +628,9 @@ void RuntimePointerChecking::groupChecks(
//
// In the above case, we have a non-constant distance and an Unknown
// dependence between accesses to the same underlying object, and could retry
// with runtime checks. Therefore UseDependencies is false. In this case we
// will use the fallback path and create separate checking groups for all
// pointers.

// If we don't have the dependency partitions, construct a new
// checking pointer group for each pointer. This is also required
// for correctness, because in this case we can have checking between
// pointers to the same underlying object.
if (!UseDependencies) {
for (unsigned I = 0; I < Pointers.size(); ++I)
CheckingGroups.emplace_back(I, *this);
return;
}
// with runtime checks without dependency information being available. In this
// case we will use the fallback path and create separate checking groups for
// accesses not present in DepCands.

unsigned TotalComparisons = 0;

Expand All @@ -664,6 +654,13 @@ void RuntimePointerChecking::groupChecks(
MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
Pointers[I].IsWritePtr);

// If there is no entry in the dependency partition, there are no potential
// accesses to merge; simply add a new pointer checking group.
if (!DepCands.contains(Access)) {
CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
continue;
}

SmallVector<RuntimeCheckingPtrGroup, 2> Groups;

// Because DepCands is constructed by visiting accesses in the order in
Expand Down Expand Up @@ -841,10 +838,12 @@ class AccessAnalysis {
/// (i.e. the pointers have computable bounds). A return value of false means
/// we couldn't analyze and generate runtime checks for all pointers in the
/// loop, but if \p AllowPartial is set then we will have checks for those
/// pointers we could analyze.
/// pointers we could analyze. \p DepChecker is used to remove unknown
/// dependences from DepCands.
bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, Loop *TheLoop,
const DenseMap<Value *, const SCEV *> &Strides,
Value *&UncomputablePtr, bool AllowPartial);
Value *&UncomputablePtr, bool AllowPartial,
const MemoryDepChecker &DepChecker);

/// Goes over all memory accesses, checks whether a RT check is needed
/// and builds sets of dependent accesses.
Expand Down Expand Up @@ -1290,7 +1289,7 @@ bool AccessAnalysis::createCheckForAccess(
// The id of the dependence set.
unsigned DepId;

if (isDependencyCheckNeeded()) {
if (DepCands.contains(Access)) {
Value *Leader = DepCands.getLeaderValue(Access).getPointer();
unsigned &LeaderId = DepSetId[Leader];
if (!LeaderId)
Expand All @@ -1312,15 +1311,36 @@ bool AccessAnalysis::createCheckForAccess(
bool AccessAnalysis::canCheckPtrAtRT(
RuntimePointerChecking &RtCheck, Loop *TheLoop,
const DenseMap<Value *, const SCEV *> &StridesMap, Value *&UncomputablePtr,
bool AllowPartial) {
bool AllowPartial, const MemoryDepChecker &DepChecker) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
bool CanDoRT = true;

bool MayNeedRTCheck = false;
if (!IsRTCheckAnalysisNeeded) return true;

bool IsDepCheckNeeded = isDependencyCheckNeeded();
if (auto *Deps = DepChecker.getDependences()) {
// If there are unknown dependences, this means runtime checks are needed to
// ensure there's no overlap between accesses to the same underlying object.
// Remove the equivalence classes containing both source and destination
// accesses from DepCands. This ensures runtime checks will be generated
// between those accesses and prevents them from being grouped together.
for (const auto &Dep : *Deps) {
if (Dep.Type != MemoryDepChecker::Dependence::Unknown) {
assert(MemoryDepChecker::Dependence::isSafeForVectorization(Dep.Type) ==
MemoryDepChecker::VectorizationSafetyStatus::Safe &&
"Should only skip safe dependences");
continue;
}
Instruction *Src = Dep.getSource(DepChecker);
Instruction *Dst = Dep.getDestination(DepChecker);
DepCands.eraseClass({getPointerOperand(Src), Src->mayWriteToMemory()});
DepCands.eraseClass({getPointerOperand(Dst), Dst->mayWriteToMemory()});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I correct that when both pointers have the same underlying object, only one of the eraseClass calls actually does anything (the second is a no-op)? This seems harmless; just confirming my understanding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, if we already earased the class then this will be a no-op

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adding a comment here - or removing this line and adding a comment instead - would be helpful for future readers

}
} else {
CheckDeps.clear();
DepCands = {};
}

// We assign a consecutive id to access from different alias sets.
// Accesses between different groups doesn't need to be checked.
Expand Down Expand Up @@ -1447,7 +1467,7 @@ bool AccessAnalysis::canCheckPtrAtRT(
}

if (MayNeedRTCheck && (CanDoRT || AllowPartial))
RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
RtCheck.generateChecks(DepCands);

LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
<< " pointer comparisons.\n");
Expand Down Expand Up @@ -2721,8 +2741,9 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
Value *UncomputablePtr = nullptr;
HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
*PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr, AllowPartial);
HasCompletePtrRtChecking =
Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
UncomputablePtr, AllowPartial, getDepChecker());
if (!HasCompletePtrRtChecking) {
const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
recordAnalysis("CantIdentifyArrayBounds", I)
Expand All @@ -2744,16 +2765,13 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeChecks()) {
LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");

// Clear the dependency checks. We assume they are not needed.
Accesses.resetDepChecks(*DepChecker);

PtrRtChecking->reset();
PtrRtChecking->Need = true;

UncomputablePtr = nullptr;
HasCompletePtrRtChecking =
Accesses.canCheckPtrAtRT(*PtrRtChecking, TheLoop, SymbolicStrides,
UncomputablePtr, AllowPartial);
HasCompletePtrRtChecking = Accesses.canCheckPtrAtRT(
*PtrRtChecking, TheLoop, SymbolicStrides, UncomputablePtr,
AllowPartial, getDepChecker());

// Check that we found the bounds for the pointer.
if (!HasCompletePtrRtChecking) {
Expand All @@ -2763,6 +2781,10 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
return false;
}

// Clear the dependency checks. They are no longer needed.
Accesses.resetDepChecks(*DepChecker);

DepsAreSafe = true;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,20 +115,12 @@ define void @loads_of_same_pointer_with_different_sizes_retry_with_runtime_check
; CHECK-NEXT: %gep.B.iv = getelementptr inbounds i32, ptr %B, i64 %iv
; CHECK-NEXT: Against group GRP2:
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
; CHECK-NEXT: Check 2:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: %gep.B.iv = getelementptr inbounds i32, ptr %B, i64 %iv
; CHECK-NEXT: Against group GRP3:
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
; CHECK-NEXT: Check 3:
; CHECK-NEXT: Check 2:
; CHECK-NEXT: Comparing group GRP1:
; CHECK-NEXT: %gep.B.inc = getelementptr inbounds i32, ptr %B, i64 %inc
; CHECK-NEXT: Against group GRP2:
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
; CHECK-NEXT: Check 4:
; CHECK-NEXT: Comparing group GRP1:
; CHECK-NEXT: %gep.B.inc = getelementptr inbounds i32, ptr %B, i64 %inc
; CHECK-NEXT: Against group GRP3:
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
Expand All @@ -138,11 +130,9 @@ define void @loads_of_same_pointer_with_different_sizes_retry_with_runtime_check
; CHECK-NEXT: (Low: ((4 * %off) + %B) High: ((4 * %N) + (4 * %off) + %B))
; CHECK-NEXT: Member: {((4 * %off) + %B),+,4}<%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: %A High: (%N + %A))
; CHECK-NEXT: Member: {%A,+,1}<nuw><%loop>
; CHECK-NEXT: Group GRP3:
; CHECK-NEXT: (Low: %A High: (3 + %N + %A))
; CHECK-NEXT: Member: {%A,+,1}<nuw><%loop>
; CHECK-NEXT: Member: {%A,+,1}<nuw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,9 @@ define void @dependency_check_and_runtime_checks_needed_select_of_invariant_ptrs
; CHECK-NEXT: Check 3:
; CHECK-NEXT: Comparing group GRP1:
; CHECK-NEXT: %select = select i1 %cmp, ptr %b, ptr %c
; CHECK-NEXT: Against group GRP2:
; CHECK-NEXT: %select = select i1 %cmp, ptr %b, ptr %c
; CHECK-NEXT: Check 4:
; CHECK-NEXT: Comparing group GRP1:
; CHECK-NEXT: %select = select i1 %cmp, ptr %b, ptr %c
; CHECK-NEXT: Against group GRP3:
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
; CHECK-NEXT: Check 5:
; CHECK-NEXT: Check 4:
; CHECK-NEXT: Comparing group GRP2:
; CHECK-NEXT: %select = select i1 %cmp, ptr %b, ptr %c
; CHECK-NEXT: Against group GRP3:
Expand Down Expand Up @@ -104,14 +99,9 @@ define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs(p
; CHECK-NEXT: Check 3:
; CHECK-NEXT: Comparing group GRP1:
; CHECK-NEXT: %select = select i1 %cmp, ptr %gep.b, ptr %gep.c
; CHECK-NEXT: Against group GRP2:
; CHECK-NEXT: %select = select i1 %cmp, ptr %gep.b, ptr %gep.c
; CHECK-NEXT: Check 4:
; CHECK-NEXT: Comparing group GRP1:
; CHECK-NEXT: %select = select i1 %cmp, ptr %gep.b, ptr %gep.c
; CHECK-NEXT: Against group GRP3:
; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset
; CHECK-NEXT: Check 5:
; CHECK-NEXT: Check 4:
; CHECK-NEXT: Comparing group GRP2:
; CHECK-NEXT: %select = select i1 %cmp, ptr %gep.b, ptr %gep.c
; CHECK-NEXT: Against group GRP3:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@ define void @test_dependence_with_non_constant_offset_and_other_accesses_to_noal
; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.off, i64 %iv
; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
; CHECK-NEXT: Check 1:
; CHECK-NEXT: Comparing group GRP2:
; CHECK-NEXT: %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
; CHECK-NEXT: Against group GRP3:
; CHECK-NEXT: %gep.B.1 = getelementptr inbounds i8, ptr %B, i64 %iv.next
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: (%off + %A) High: (404 + %off + %A))
Expand All @@ -31,11 +26,9 @@ define void @test_dependence_with_non_constant_offset_and_other_accesses_to_noal
; CHECK-NEXT: (Low: %A High: (101 + %A))
; CHECK-NEXT: Member: {%A,+,1}<nuw><%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: %B High: (101 + %B))
; CHECK-NEXT: Member: {%B,+,1}<nuw><%loop>
; CHECK-NEXT: Group GRP3:
; CHECK-NEXT: (Low: (1 + %B)<nuw> High: (102 + %B))
; CHECK-NEXT: (Low: %B High: (102 + %B))
; CHECK-NEXT: Member: {(1 + %B)<nuw>,+,1}<nuw><%loop>
; CHECK-NEXT: Member: {%B,+,1}<nuw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
Expand Down Expand Up @@ -77,40 +70,30 @@ define void @test_dependence_with_non_constant_offset_and_other_accesses_to_maya
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.off, i64 %iv
; CHECK-NEXT: Against group GRP1:
; CHECK-NEXT: %gep.B.1 = getelementptr inbounds i8, ptr %B, i64 %iv.next
; CHECK-NEXT: %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
; CHECK-NEXT: Check 1:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.off, i64 %iv
; CHECK-NEXT: Against group GRP2:
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
; CHECK-NEXT: Check 2:
; CHECK-NEXT: Comparing group GRP0:
; CHECK-NEXT: %gep.A.400 = getelementptr inbounds i32, ptr %A.off, i64 %iv
; CHECK-NEXT: Against group GRP3:
; CHECK-NEXT: %gep.B.1 = getelementptr inbounds i8, ptr %B, i64 %iv.next
; CHECK-NEXT: Check 3:
; CHECK-NEXT: Comparing group GRP1:
; CHECK-NEXT: %gep.B.1 = getelementptr inbounds i8, ptr %B, i64 %iv.next
; CHECK-NEXT: %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
; CHECK-NEXT: Against group GRP2:
; CHECK-NEXT: %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
; CHECK-NEXT: Check 4:
; CHECK-NEXT: Comparing group GRP1:
; CHECK-NEXT: %gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
; CHECK-NEXT: Against group GRP3:
; CHECK-NEXT: %gep.B.1 = getelementptr inbounds i8, ptr %B, i64 %iv.next
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: (%off + %A) High: (404 + %off + %A))
; CHECK-NEXT: Member: {(%off + %A),+,4}<nw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %B High: (101 + %B))
; CHECK-NEXT: (Low: %B High: (102 + %B))
; CHECK-NEXT: Member: {(1 + %B)<nuw>,+,1}<nuw><%loop>
; CHECK-NEXT: Member: {%B,+,1}<nuw><%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: %A High: (101 + %A))
; CHECK-NEXT: Member: {%A,+,1}<nuw><%loop>
; CHECK-NEXT: Group GRP3:
; CHECK-NEXT: (Low: (1 + %B)<nuw> High: (102 + %B))
; CHECK-NEXT: Member: {(1 + %B)<nuw>,+,1}<nuw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
Expand Down
32 changes: 14 additions & 18 deletions llvm/test/Transforms/LoopVectorize/runtime-checks-difference.ll
Original file line number Diff line number Diff line change
Expand Up @@ -388,28 +388,24 @@ define void @use_diff_checks_when_retrying_with_rt_checks(i64 %off, ptr %dst, pt
; CHECK-LABEL: define void @use_diff_checks_when_retrying_with_rt_checks(
; CHECK-SAME: i64 [[OFF:%.*]], ptr [[DST:%.*]], ptr [[SRC:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[SRC2:%.*]] = ptrtoint ptr [[SRC]] to i64
; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT: br i1 false, [[SCALAR_PH:label %.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[OFF]], -8
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[OFF]], 3
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[DST1]], [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[SRC2]]
; CHECK-NEXT: [[DIFF_CHECK3:%.*]] = icmp ult i64 [[TMP3]], 32
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK3]]
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[SRC2]], 8
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[DST1]]
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], [[TMP1]]
; CHECK-NEXT: [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP6]], 32
; CHECK-NEXT: [[CONFLICT_RDX5:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK4]]
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[DST1]], [[SRC2]]
; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP7]], 32
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 8000
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[DST]], i64 8000
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 8008
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[CONFLICT_RDX5:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[SCEVGEP]], [[SCEVGEP3]]
; CHECK-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP1]]
; CHECK-NEXT: [[DIFF_CHECK6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
; CHECK-NEXT: [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX5]], [[DIFF_CHECK6]]
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[DST1]], -8
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP8]], [[SRC2]]
; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = icmp ult i64 [[TMP9]], 32
; CHECK-NEXT: [[BOUND07:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP3]]
; CHECK-NEXT: [[BOUND18:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP2]]
; CHECK-NEXT: [[DIFF_CHECK8:%.*]] = and i1 [[BOUND07]], [[BOUND18]]
; CHECK-NEXT: [[CONFLICT_RDX9:%.*]] = or i1 [[CONFLICT_RDX7]], [[DIFF_CHECK8]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX9]], [[SCALAR_PH]], [[VECTOR_PH:label %.*]]
;
Expand Down