Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LAA] Hoist setting condition for RT-checks #128045

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,6 @@ class MemoryDepChecker {
uint64_t MaxStride;
std::optional<uint64_t> CommonStride;

bool ShouldRetryWithRuntimeCheck;

/// TypeByteSize is either the common store size of both accesses, or 0 when
/// store sizes mismatch.
uint64_t TypeByteSize;
Expand All @@ -383,11 +381,9 @@ class MemoryDepChecker {

DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t MaxStride,
std::optional<uint64_t> CommonStride,
bool ShouldRetryWithRuntimeCheck,
uint64_t TypeByteSize, bool AIsWrite,
bool BIsWrite)
: Dist(Dist), MaxStride(MaxStride), CommonStride(CommonStride),
ShouldRetryWithRuntimeCheck(ShouldRetryWithRuntimeCheck),
TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
};

Expand Down
42 changes: 9 additions & 33 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2009,14 +2009,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
if (StrideAScaled == StrideBScaled)
CommonStride = StrideAScaled;

// TODO: Historically, we don't retry with runtime checks unless the
// (unscaled) strides are the same. Fix this once the condition for runtime
// checks in isDependent is fixed.
bool ShouldRetryWithRuntimeCheck = StrideAPtrInt == StrideBPtrInt;
// TODO: FoundNonConstantDistanceDependence is used as a necessary condition
// to consider retrying with runtime checks. Historically, we did not set it
// when (unscaled) strides were different but there is no inherent reason to.
if (!isa<SCEVConstant>(Dist))
FoundNonConstantDistanceDependence |= StrideAPtrInt == StrideBPtrInt;

return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
ShouldRetryWithRuntimeCheck, TypeByteSize,
AIsWrite, BIsWrite);
TypeByteSize, AIsWrite, BIsWrite);
}

MemoryDepChecker::Dependence::DepType
Expand All @@ -2031,15 +2031,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
if (std::holds_alternative<Dependence::DepType>(Res))
return std::get<Dependence::DepType>(Res);

auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck,
TypeByteSize, AIsWrite, BIsWrite] =
auto &[Dist, MaxStride, CommonStride, TypeByteSize, AIsWrite, BIsWrite] =
std::get<DepDistanceStrideAndSizeInfo>(Res);
bool HasSameSize = TypeByteSize > 0;

if (isa<SCEVCouldNotCompute>(Dist)) {
// TODO: Relax requirement that there is a common unscaled stride to retry
// with non-constant distance dependencies.
FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
return Dependence::Unknown;
}
Expand Down Expand Up @@ -2099,14 +2095,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// forward dependency will allow vectorization using any width.

if (IsTrueDataDependence && EnableForwardingConflictDetection) {
if (!ConstDist) {
// TODO: FoundNonConstantDistanceDependence is used as a necessary
// condition to consider retrying with runtime checks. Historically, we
// did not set it when strides were different but there is no inherent
// reason to.
FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
if (!ConstDist)
return Dependence::Unknown;
}
if (!HasSameSize ||
couldPreventStoreLoadForward(
ConstDist->getAPInt().abs().getZExtValue(), TypeByteSize)) {
Expand All @@ -2122,22 +2112,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,

int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
// Below we only handle strictly positive distances.
if (MinDistance <= 0) {
FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
if (MinDistance <= 0)
return Dependence::Unknown;
}

if (!ConstDist) {
// Previously this case would be treated as Unknown, possibly setting
// FoundNonConstantDistanceDependence to force re-trying with runtime
// checks. Until the TODO below is addressed, set it here to preserve
// original behavior w.r.t. re-trying with runtime checks.
// TODO: FoundNonConstantDistanceDependence is used as a necessary
// condition to consider retrying with runtime checks. Historically, we
// did not set it when strides were different but there is no inherent
// reason to.
FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
}

if (!HasSameSize) {
LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,28 +320,34 @@ exit:
define void @retry_after_dep_check_with_unknown_offset(ptr %A, i32 %offset) {
; CHECK-LABEL: 'retry_after_dep_check_with_unknown_offset'
; CHECK-NEXT: loop:
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Memory dependences are safe with run-time checks
; CHECK-NEXT: Dependences:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %l.A = load float, ptr %A, align 4 ->
; CHECK-NEXT: store float 0.000000e+00, ptr %A.100.iv.offset.3, align 4
; CHECK-EMPTY:
; CHECK-NEXT: Unknown:
; CHECK-NEXT: %l.A = load float, ptr %A, align 4 ->
; CHECK-NEXT: store float %l.A, ptr %A.100.iv, align 8
; CHECK-EMPTY:
; CHECK-NEXT: Run-time memory checks:
; CHECK-NEXT: Check 0:
; CHECK-NEXT: Comparing group ([[GRP16:0x[0-9a-f]+]]):
; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv
; CHECK-NEXT: Against group ([[GRP17:0x[0-9a-f]+]]):
; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3
; CHECK-NEXT: Check 1:
; CHECK-NEXT: Comparing group ([[GRP16]]):
; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv
; CHECK-NEXT: Against group ([[GRP18:0x[0-9a-f]+]]):
; CHECK-NEXT: ptr %A
; CHECK-NEXT: Check 2:
; CHECK-NEXT: Comparing group ([[GRP17]]):
; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3
; CHECK-NEXT: Against group ([[GRP18]]):
; CHECK-NEXT: ptr %A
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group [[GRP16:0x[0-9a-f]+]]:
; CHECK-NEXT: (Low: %A High: (4 + %A))
; CHECK-NEXT: Member: %A
; CHECK-NEXT: Group [[GRP17:0x[0-9a-f]+]]:
; CHECK-NEXT: (Low: (100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A) High: (96 + (16 * (zext i32 %offset to i64))<nuw><nsw> + %A))
; CHECK-NEXT: Member: {(100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A),+,8}<%loop>
; CHECK-NEXT: Group [[GRP18:0x[0-9a-f]+]]:
; CHECK-NEXT: Group [[GRP16]]:
; CHECK-NEXT: (Low: (100 + %A) High: (96 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A))
; CHECK-NEXT: Member: {(100 + %A),+,8}<%loop>
; CHECK-NEXT: Group [[GRP17]]:
; CHECK-NEXT: (Low: (100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A) High: (96 + (16 * (zext i32 %offset to i64))<nuw><nsw> + %A))
; CHECK-NEXT: Member: {(100 + (8 * (zext i32 %offset to i64))<nuw><nsw> + %A),+,8}<%loop>
; CHECK-NEXT: Group [[GRP18]]:
; CHECK-NEXT: (Low: %A High: (4 + %A))
; CHECK-NEXT: Member: %A
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
Expand Down