diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index b1ba8e7c0f601..f65515ca38722 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1920,20 +1920,21 @@ isLoopVariantIndirectAddress(ArrayRef UnderlyingObjects, namespace { struct DepDistanceStrideAndSizeInfo { const SCEV *Dist; - uint64_t Stride; + uint64_t StrideA; + uint64_t StrideB; uint64_t TypeByteSize; bool AIsWrite; bool BIsWrite; - DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t Stride, - uint64_t TypeByteSize, bool AIsWrite, - bool BIsWrite) - : Dist(Dist), Stride(Stride), TypeByteSize(TypeByteSize), - AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} + DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA, + uint64_t StrideB, uint64_t TypeByteSize, + bool AIsWrite, bool BIsWrite) + : Dist(Dist), StrideA(StrideA), StrideB(StrideB), + TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {} }; } // namespace -// Get the dependence distance, stride, type size and whether it is a write for +// Get the dependence distance, strides, type size and whether it is a write for // the dependence between A and B. Returns a DepType, if we can prove there's // no dependence or the analysis fails. Outlined to lambda to limit he scope // of various temporary variables, like A/BPtr, StrideA/BPtr and others. @@ -1995,10 +1996,11 @@ getDependenceDistanceStrideAndSize( InnermostLoop)) return MemoryDepChecker::Dependence::IndirectUnsafe; - // Need accesses with constant stride. We don't want to vectorize - // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap - // in the address space. - if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr) { + // Need accesses with constant strides and the same direction. We don't want + // to vectorize "A[B[i]] += ..." and similar code or pointer arithmetic that + // could wrap in the address space. + if (!StrideAPtr || !StrideBPtr || (StrideAPtr > 0 && StrideBPtr < 0) || + (StrideAPtr < 0 && StrideBPtr > 0)) { LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); return MemoryDepChecker::Dependence::Unknown; } @@ -2008,9 +2010,9 @@ getDependenceDistanceStrideAndSize( DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); if (!HasSameSize) TypeByteSize = 0; - uint64_t Stride = std::abs(StrideAPtr); - return DepDistanceStrideAndSizeInfo(Dist, Stride, TypeByteSize, AIsWrite, - BIsWrite); + return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtr), + std::abs(StrideBPtr), TypeByteSize, + AIsWrite, BIsWrite); } MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( @@ -2028,41 +2030,63 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( if (std::holds_alternative(Res)) return std::get(Res); - const auto &[Dist, Stride, TypeByteSize, AIsWrite, BIsWrite] = + const auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] = std::get(Res); bool HasSameSize = TypeByteSize > 0; + std::optional CommonStride = + StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt; + if (isa(Dist)) { + // TODO: Relax requirement that there is a common stride to retry with + // non-constant distance dependencies. + FoundNonConstantDistanceDependence |= !!CommonStride; + LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n"); + return Dependence::Unknown; + } + ScalarEvolution &SE = *PSE.getSE(); auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); + // If the distance between the acecsses is larger than their absolute stride // multiplied by the backedge taken count, the accesses are independet, i.e. // they are far enough appart that accesses won't access the same location // across all loop ierations. - if (!isa(Dist) && HasSameSize && + if (HasSameSize && CommonStride && isSafeDependenceDistance(DL, SE, *(PSE.getBackedgeTakenCount()), *Dist, - Stride, TypeByteSize)) + *CommonStride, TypeByteSize)) return Dependence::NoDep; const SCEVConstant *C = dyn_cast(Dist); - if (!C) { - LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); - FoundNonConstantDistanceDependence = true; - return Dependence::Unknown; - } - const APInt &Val = C->getAPInt(); - int64_t Distance = Val.getSExtValue(); - - // If the distance between accesses and their strides are known constants, - // check whether the accesses interlace each other. - if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize && - areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { - LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); - return Dependence::NoDep; + // Attempt to prove strided accesses independent. + if (C) { + const APInt &Val = C->getAPInt(); + int64_t Distance = Val.getSExtValue(); + + // If the distance between accesses and their strides are known constants, + // check whether the accesses interlace each other. + if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 && + HasSameSize && + areStridedAccessesIndependent(std::abs(Distance), *CommonStride, + TypeByteSize)) { + LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); + return Dependence::NoDep; + } } // Negative distances are not plausible dependencies. - if (Val.isNegative()) { + if (SE.isKnownNonPositive(Dist)) { + if (SE.isKnownNonNegative(Dist)) { + if (HasSameSize) { + // Write to the same location with the same size. + return Dependence::Forward; + } else { + LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but " + "different type sizes\n"); + return Dependence::Unknown; + } + } + bool IsTrueDataDependence = (AIsWrite && !BIsWrite); // Check if the first access writes to a location that is read in a later // iteration, where the distance between them is not a multiple of a vector @@ -2071,27 +2095,40 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a // forward dependency will allow vectorization using any width. - if (IsTrueDataDependence && EnableForwardingConflictDetection && - (!HasSameSize || couldPreventStoreLoadForward(Val.abs().getZExtValue(), - TypeByteSize))) { - LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); - return Dependence::ForwardButPreventsForwarding; + + if (IsTrueDataDependence && EnableForwardingConflictDetection) { + if (!C) { + // TODO: FoundNonConstantDistanceDependence is used as a necessary + // condition to consider retrying with runtime checks. Historically, we + // did not set it when strides were different but there is no inherent + // reason to. + FoundNonConstantDistanceDependence |= CommonStride.has_value(); + return Dependence::Unknown; + } + if (!HasSameSize || + couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(), + TypeByteSize)) { + LLVM_DEBUG( + dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); + return Dependence::ForwardButPreventsForwarding; + } } LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n"); return Dependence::Forward; } - // Write to the same location with the same size. - if (Val == 0) { - if (HasSameSize) - return Dependence::Forward; - LLVM_DEBUG( - dbgs() << "LAA: Zero dependence difference but different type sizes\n"); + if (!C) { + // TODO: FoundNonConstantDistanceDependence is used as a necessary condition + // to consider retrying with runtime checks. Historically, we did not set it + // when strides were different but there is no inherent reason to. + FoundNonConstantDistanceDependence |= CommonStride.has_value(); + LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n"); return Dependence::Unknown; } - assert(Val.isStrictlyPositive() && "Expect a positive value"); + if (!SE.isKnownPositive(Dist)) + return Dependence::Unknown; if (!HasSameSize) { LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " @@ -2099,6 +2136,14 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( return Dependence::Unknown; } + // The logic below currently only supports StrideA == StrideB, i.e. there's a + // common stride. + if (!CommonStride) + return Dependence::Unknown; + + const APInt &Val = C->getAPInt(); + int64_t Distance = Val.getSExtValue(); + // Bail out early if passed-in parameters make vectorization not feasible. unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? VectorizerParams::VectorizationFactor : 1); @@ -2134,7 +2179,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( // the minimum distance needed is 28, which is greater than distance. It is // not safe to do vectorization. uint64_t MinDistanceNeeded = - TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; + TypeByteSize * (*CommonStride) * (MinNumIter - 1) + TypeByteSize; if (MinDistanceNeeded > static_cast(Distance)) { LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance << '\n'); @@ -2183,7 +2228,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits // since there is a backwards dependency. - uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * Stride); + uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * (*CommonStride)); LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'); uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index edddfb1b92402..059900f357e64 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -126,8 +126,10 @@ struct StoreToLoadForwardingCandidate { // We don't need to check non-wrapping here because forward/backward // dependence wouldn't be valid if these weren't monotonic accesses. - auto *Dist = cast( + auto *Dist = dyn_cast( PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); + if (!Dist) + return false; const APInt &Val = Dist->getAPInt(); return Val == TypeByteSize * StrideLoad; } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll index 932129bbb957f..5312c36e436a2 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/different-strides-safe-dep-due-to-backedge-taken-count.ll @@ -6,10 +6,9 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define void @forward_dep_known_safe_due_to_backedge_taken_count(ptr %A) { ; CHECK-LABEL: 'forward_dep_known_safe_due_to_backedge_taken_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: Forward: ; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 ; CHECK-EMPTY: @@ -44,10 +43,9 @@ exit: define void @forward_dep_not_known_safe_due_to_backedge_taken_count(ptr %A) { ; CHECK-LABEL: 'forward_dep_not_known_safe_due_to_backedge_taken_count' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: Forward: ; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 ; CHECK-EMPTY: diff --git a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll index 51755314896bb..5f4c732dc19df 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/non-constant-strides-forward.ll @@ -8,10 +8,9 @@ declare void @llvm.assume(i1) define void @different_non_constant_strides_known_forward(ptr %A) { ; CHECK-LABEL: 'different_non_constant_strides_known_forward' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: Forward: ; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 ; CHECK-EMPTY: @@ -45,10 +44,9 @@ exit: define void @different_non_constant_strides_known_forward_min_distance_3(ptr %A) { ; CHECK-LABEL: 'different_non_constant_strides_known_forward_min_distance_3' ; CHECK-NEXT: loop: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Memory dependences are safe ; CHECK-NEXT: Dependences: -; CHECK-NEXT: Unknown: +; CHECK-NEXT: Forward: ; CHECK-NEXT: %l = load i32, ptr %gep.mul.2, align 4 -> ; CHECK-NEXT: store i32 %add, ptr %gep, align 4 ; CHECK-EMPTY: