@@ -1805,8 +1805,7 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
18051805// / }
18061806static bool isSafeDependenceDistance (const DataLayout &DL, ScalarEvolution &SE,
18071807 const SCEV &MaxBTC, const SCEV &Dist,
1808- uint64_t MaxStride,
1809- uint64_t TypeByteSize) {
1808+ uint64_t MaxStride) {
18101809
18111810 // If we can prove that
18121811 // (**) |Dist| > MaxBTC * Step
@@ -1825,8 +1824,7 @@ static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
18251824 // will be executed only if LoopCount >= VF, proving distance >= LoopCount
18261825 // also guarantees that distance >= VF.
18271826 //
1828- const uint64_t ByteStride = MaxStride * TypeByteSize;
1829- const SCEV *Step = SE.getConstant (MaxBTC.getType (), ByteStride);
1827+ const SCEV *Step = SE.getConstant (MaxBTC.getType (), MaxStride);
18301828 const SCEV *Product = SE.getMulExpr (&MaxBTC, Step);
18311829
18321830 const SCEV *CastedDist = &Dist;
@@ -1870,9 +1868,7 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
18701868 if (Distance % TypeByteSize)
18711869 return false ;
18721870
1873- uint64_t ScaledDist = Distance / TypeByteSize;
1874-
1875- // No dependence if the scaled distance is not multiple of the stride.
1871+ // No dependence if the distance is not multiple of the stride.
18761872 // E.g.
18771873 // for (i = 0; i < 1024 ; i += 4)
18781874 // A[i+2] = A[i] + 1;
@@ -1888,7 +1884,7 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
18881884 // Two accesses in memory (scaled distance is 4, stride is 3):
18891885 // | A[0] | | | A[3] | | | A[6] | | |
18901886 // | | | | | A[4] | | | A[7] | |
1891- return ScaledDist % Stride;
1887+ return Distance % Stride;
18921888}
18931889
18941890std::variant<MemoryDepChecker::Dependence::DepType,
@@ -1927,6 +1923,7 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19271923 if (StrideAPtr && *StrideAPtr < 0 ) {
19281924 std::swap (Src, Sink);
19291925 std::swap (AInst, BInst);
1926+ std::swap (ATy, BTy);
19301927 std::swap (StrideAPtr, StrideBPtr);
19311928 }
19321929
@@ -1970,31 +1967,51 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize(
19701967 return MemoryDepChecker::Dependence::IndirectUnsafe;
19711968 }
19721969
1973- int64_t StrideAPtrInt = *StrideAPtr;
1974- int64_t StrideBPtrInt = *StrideBPtr;
1975- LLVM_DEBUG (dbgs () << " LAA: Src induction step: " << StrideAPtrInt
1976- << " Sink induction step: " << StrideBPtrInt << " \n " );
1970+ LLVM_DEBUG (dbgs () << " LAA: Src induction step: " << *StrideAPtr
1971+ << " Sink induction step: " << *StrideBPtr << " \n " );
1972+
1973+ // Note that store size is different from alloc size, which is dependent on
1974+ // store size. We use the former for checking illegal cases, and the latter
1975+ // for scaling strides.
1976+ TypeSize AStoreSz = DL.getTypeStoreSize (ATy),
1977+ BStoreSz = DL.getTypeStoreSize (BTy);
1978+
1979+ // When the distance is zero, we're reading/writing the same memory location:
1980+ // check that the store sizes are equal. Otherwise, fail with an unknown
1981+ // dependence for which we should not generate runtime checks.
1982+ if (Dist->isZero () && AStoreSz != BStoreSz)
1983+ return MemoryDepChecker::Dependence::Unknown;
1984+
1985+ // We can't get get a uint64_t for the AllocSize if either of the store sizes
1986+ // are scalable.
1987+ if (AStoreSz.isScalable () || BStoreSz.isScalable ())
1988+ return MemoryDepChecker::Dependence::Unknown;
1989+
1990+ // The TypeByteSize is used to scale Distance and VF. In these contexts, the
1991+ // only size that matters is the size of the Sink.
1992+ uint64_t ASz = alignTo (AStoreSz, DL.getABITypeAlign (ATy).value ()),
1993+ TypeByteSize = alignTo (BStoreSz, DL.getABITypeAlign (BTy).value ());
1994+
1995+ // We scale the strides by the alloc-type-sizes, so we can check that the
1996+ // common distance is equal when ASz != BSz.
1997+ int64_t StrideAScaled = *StrideAPtr * ASz;
1998+ int64_t StrideBScaled = *StrideBPtr * TypeByteSize;
1999+
19772000 // At least Src or Sink are loop invariant and the other is strided or
19782001 // invariant. We can generate a runtime check to disambiguate the accesses.
1979- if (StrideAPtrInt == 0 || StrideBPtrInt == 0 )
2002+ if (!StrideAScaled || !StrideBScaled )
19802003 return MemoryDepChecker::Dependence::Unknown;
19812004
19822005 // Both Src and Sink have a constant stride, check if they are in the same
19832006 // direction.
1984- if ((StrideAPtrInt > 0 && StrideBPtrInt < 0 ) ||
1985- (StrideAPtrInt < 0 && StrideBPtrInt > 0 )) {
2007+ if (StrideAScaled > 0 != StrideBScaled > 0 ) {
19862008 LLVM_DEBUG (
19872009 dbgs () << " Pointer access with strides in different directions\n " );
19882010 return MemoryDepChecker::Dependence::Unknown;
19892011 }
19902012
1991- uint64_t TypeByteSize = DL.getTypeAllocSize (ATy);
1992- bool HasSameSize =
1993- DL.getTypeStoreSizeInBits (ATy) == DL.getTypeStoreSizeInBits (BTy);
1994- if (!HasSameSize)
1995- TypeByteSize = 0 ;
1996- return DepDistanceStrideAndSizeInfo (Dist, std::abs (StrideAPtrInt),
1997- std::abs (StrideBPtrInt), TypeByteSize,
2013+ return DepDistanceStrideAndSizeInfo (Dist, std::abs (StrideAScaled),
2014+ std::abs (StrideBScaled), TypeByteSize,
19982015 AIsWrite, BIsWrite);
19992016}
20002017
@@ -2012,15 +2029,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20122029
20132030 auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] =
20142031 std::get<DepDistanceStrideAndSizeInfo>(Res);
2015- bool HasSameSize = TypeByteSize > 0 ;
20162032
20172033 std::optional<uint64_t > CommonStride =
20182034 StrideA == StrideB ? std::make_optional (StrideA) : std::nullopt ;
20192035 if (isa<SCEVCouldNotCompute>(Dist)) {
2020- // TODO: Relax requirement that there is a common stride to retry with
2021- // non-constant distance dependencies.
2022- FoundNonConstantDistanceDependence |= CommonStride.has_value ();
20232036 LLVM_DEBUG (dbgs () << " LAA: Dependence because of uncomputable distance.\n " );
2037+ FoundNonConstantDistanceDependence = true ;
20242038 return Dependence::Unknown;
20252039 }
20262040
@@ -2033,24 +2047,20 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20332047 // upper bound of the number of iterations), the accesses are independet, i.e.
20342048 // they are far enough appart that accesses won't access the same location
20352049 // across all loop ierations.
2036- if (HasSameSize && isSafeDependenceDistance (
2037- DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount ()),
2038- *Dist, MaxStride, TypeByteSize))
2050+ if (isSafeDependenceDistance (
2051+ DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount ()), *Dist, MaxStride))
20392052 return Dependence::NoDep;
20402053
2041- const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
2054+ const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
20422055
20432056 // Attempt to prove strided accesses independent.
2044- if (C) {
2045- const APInt &Val = C->getAPInt ();
2046- int64_t Distance = Val.getSExtValue ();
2057+ if (ConstDist) {
2058+ int64_t Distance = std::abs (ConstDist->getAPInt ().getSExtValue ());
20472059
20482060 // If the distance between accesses and their strides are known constants,
20492061 // check whether the accesses interlace each other.
2050- if (std::abs (Distance) > 0 && CommonStride && *CommonStride > 1 &&
2051- HasSameSize &&
2052- areStridedAccessesIndependent (std::abs (Distance), *CommonStride,
2053- TypeByteSize)) {
2062+ if (Distance > 0 && CommonStride && CommonStride > 1 &&
2063+ areStridedAccessesIndependent (Distance, *CommonStride, TypeByteSize)) {
20542064 LLVM_DEBUG (dbgs () << " LAA: Strided accesses are independent\n " );
20552065 return Dependence::NoDep;
20562066 }
@@ -2063,15 +2073,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20632073
20642074 // Negative distances are not plausible dependencies.
20652075 if (SE.isKnownNonPositive (Dist)) {
2066- if (SE.isKnownNonNegative (Dist)) {
2067- if (HasSameSize) {
2068- // Write to the same location with the same size.
2069- return Dependence::Forward;
2070- }
2071- LLVM_DEBUG (dbgs () << " LAA: possibly zero dependence difference but "
2072- " different type sizes\n " );
2073- return Dependence::Unknown;
2074- }
2076+ if (SE.isKnownNonNegative (Dist))
2077+ // Write to the same location.
2078+ return Dependence::Forward;
20752079
20762080 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
20772081 // Check if the first access writes to a location that is read in a later
@@ -2083,17 +2087,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
20832087 // forward dependency will allow vectorization using any width.
20842088
20852089 if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2086- if (!C) {
2087- // TODO: FoundNonConstantDistanceDependence is used as a necessary
2088- // condition to consider retrying with runtime checks. Historically, we
2089- // did not set it when strides were different but there is no inherent
2090- // reason to.
2091- FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2090+ if (!ConstDist) {
2091+ FoundNonConstantDistanceDependence = true ;
20922092 return Dependence::Unknown;
20932093 }
2094- if (!HasSameSize ||
2095- couldPreventStoreLoadForward (C->getAPInt ().abs ().getZExtValue (),
2096- TypeByteSize)) {
2094+ if (couldPreventStoreLoadForward (
2095+ ConstDist->getAPInt ().abs ().getZExtValue (), TypeByteSize)) {
20972096 LLVM_DEBUG (
20982097 dbgs () << " LAA: Forward but may prevent st->ld forwarding\n " );
20992098 return Dependence::ForwardButPreventsForwarding;
@@ -2107,27 +2106,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21072106 int64_t MinDistance = SE.getSignedRangeMin (Dist).getSExtValue ();
21082107 // Below we only handle strictly positive distances.
21092108 if (MinDistance <= 0 ) {
2110- FoundNonConstantDistanceDependence |= CommonStride. has_value () ;
2109+ FoundNonConstantDistanceDependence = true ;
21112110 return Dependence::Unknown;
21122111 }
21132112
2114- if (!isa<SCEVConstant>(Dist)) {
2115- // Previously this case would be treated as Unknown, possibly setting
2116- // FoundNonConstantDistanceDependence to force re-trying with runtime
2117- // checks. Until the TODO below is addressed, set it here to preserve
2118- // original behavior w.r.t. re-trying with runtime checks.
2119- // TODO: FoundNonConstantDistanceDependence is used as a necessary
2120- // condition to consider retrying with runtime checks. Historically, we
2121- // did not set it when strides were different but there is no inherent
2122- // reason to.
2123- FoundNonConstantDistanceDependence |= CommonStride.has_value ();
2124- }
2125-
2126- if (!HasSameSize) {
2127- LLVM_DEBUG (dbgs () << " LAA: ReadWrite-Write positive dependency with "
2128- " different type sizes\n " );
2129- return Dependence::Unknown;
2130- }
2113+ if (!ConstDist)
2114+ FoundNonConstantDistanceDependence = true ;
21312115
21322116 if (!CommonStride)
21332117 return Dependence::Unknown;
@@ -2142,8 +2126,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21422126
21432127 // It's not vectorizable if the distance is smaller than the minimum distance
21442128 // needed for a vectroized/unrolled version. Vectorizing one iteration in
2145- // front needs TypeByteSize * Stride . Vectorizing the last iteration needs
2146- // TypeByteSize (No need to plus the last gap distance).
2129+ // front needs CommonStride . Vectorizing the last iteration needs TypeByteSize
2130+ // (No need to plus the last gap distance).
21472131 //
21482132 // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
21492133 // foo(int *A) {
@@ -2170,10 +2154,9 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
21702154 // We know that Dist is positive, but it may not be constant. Use the signed
21712155 // minimum for computations below, as this ensures we compute the closest
21722156 // possible dependence distance.
2173- uint64_t MinDistanceNeeded =
2174- TypeByteSize * *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
2157+ uint64_t MinDistanceNeeded = *CommonStride * (MinNumIter - 1 ) + TypeByteSize;
21752158 if (MinDistanceNeeded > static_cast <uint64_t >(MinDistance)) {
2176- if (!isa<SCEVConstant>(Dist) ) {
2159+ if (!ConstDist ) {
21772160 // For non-constant distances, we checked the lower bound of the
21782161 // dependence distance and the distance may be larger at runtime (and safe
21792162 // for vectorization). Classify it as Unknown, so we re-try with runtime
@@ -2228,12 +2211,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
22282211
22292212 // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
22302213 // since there is a backwards dependency.
2231- uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * * CommonStride) ;
2214+ uint64_t MaxVF = MinDepDistBytes / * CommonStride;
22322215 LLVM_DEBUG (dbgs () << " LAA: Positive min distance " << MinDistance
22332216 << " with max VF = " << MaxVF << ' \n ' );
22342217
22352218 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8 ;
2236- if (!isa<SCEVConstant>(Dist) && MaxVFInBits < MaxTargetVectorWidthInBits) {
2219+ if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
22372220 // For non-constant distances, we checked the lower bound of the dependence
22382221 // distance and the distance may be larger at runtime (and safe for
22392222 // vectorization). Classify it as Unknown, so we re-try with runtime checks.
0 commit comments