-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
release/18.x: [SROA]: Only defer trying partial sized ptr or ptr vector types #86114
Merged
+115
−27
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-llvm-transforms Author: None (llvmbot) ChangesBackport 1e828f8 Requested by: @DianQK Full diff: https://github.com/llvm/llvm-project/pull/86114.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index bdbaf4f55c96d0..17a94f9381bf8e 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -2257,6 +2257,41 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
return nullptr;
}
+static VectorType *createAndCheckVectorTypesForPromotion(
+ SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
+ function_ref<void(Type *)> CheckCandidateType, Partition &P,
+ const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
+ bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
+ bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
+ [[maybe_unused]] VectorType *OriginalElt =
+ CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
+ // Consider additional vector types where the element type size is a
+ // multiple of load/store element size.
+ for (Type *Ty : OtherTys) {
+ if (!VectorType::isValidElementType(Ty))
+ continue;
+ unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
+ // Make a copy of CandidateTys and iterate through it, because we
+ // might append to CandidateTys in the loop.
+ for (VectorType *const VTy : CandidateTysCopy) {
+ // The elements in the copy should remain invariant throughout the loop
+ assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
+ unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
+ unsigned ElementSize =
+ DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
+ if (TypeSize != VectorSize && TypeSize != ElementSize &&
+ VectorSize % TypeSize == 0) {
+ VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
+ CheckCandidateType(NewVTy);
+ }
+ }
+ }
+
+ return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
+ CommonEltTy, HaveVecPtrTy,
+ HaveCommonVecPtrTy, CommonVecPtrTy);
+}
+
/// Test whether the given alloca partitioning and range of slices can be
/// promoted to a vector.
///
@@ -2271,6 +2306,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// we have different element types.
SmallVector<VectorType *, 4> CandidateTys;
SetVector<Type *> LoadStoreTys;
+ SetVector<Type *> DeferredTys;
Type *CommonEltTy = nullptr;
VectorType *CommonVecPtrTy = nullptr;
bool HaveVecPtrTy = false;
@@ -2314,42 +2350,32 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
Ty = SI->getValueOperand()->getType();
else
continue;
+
+ auto CandTy = Ty->getScalarType();
+ if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
+ S.endOffset() != P.endOffset())) {
+ DeferredTys.insert(Ty);
+ continue;
+ }
+
LoadStoreTys.insert(Ty);
// Consider any loads or stores that are the exact size of the slice.
if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
CheckCandidateType(Ty);
}
- if (auto *VTy = checkVectorTypesForPromotion(
- P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
+ SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
+ if (auto *VTy = createAndCheckVectorTypesForPromotion(
+ LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
+ CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
HaveCommonVecPtrTy, CommonVecPtrTy))
return VTy;
- // Consider additional vector types where the element type size is a
- // multiple of load/store element size.
- for (Type *Ty : LoadStoreTys) {
- if (!VectorType::isValidElementType(Ty))
- continue;
- unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
- // Make a copy of CandidateTys and iterate through it, because we might
- // append to CandidateTys in the loop.
- SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
- CandidateTys.clear();
- for (VectorType *&VTy : CandidateTysCopy) {
- unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
- unsigned ElementSize =
- DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
- if (TypeSize != VectorSize && TypeSize != ElementSize &&
- VectorSize % TypeSize == 0) {
- VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
- CheckCandidateType(NewVTy);
- }
- }
- }
-
- return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
- CommonEltTy, HaveVecPtrTy,
- HaveCommonVecPtrTy, CommonVecPtrTy);
+ CandidateTys.clear();
+ return createAndCheckVectorTypesForPromotion(
+ DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
+ HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
+ CommonVecPtrTy);
}
/// Test whether a slice of an alloca is valid for integer widening.
diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index ee35a0fd5fea65..f0e605fb4ad5c9 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -1388,6 +1388,68 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) {
ret <4 x ptr> %sroaval
}
+define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) {
+; CHECK-LABEL: @validLoadStoreTy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0
+; CHECK-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0
+; CHECK-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1
+; CHECK-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP0]]
+;
+; DEBUG-LABEL: @validLoadStoreTy(
+; DEBUG-NEXT: entry:
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]]
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]]
+; DEBUG-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]]
+; DEBUG-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]]
+; DEBUG-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]]
+; DEBUG-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]]
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]]
+; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]]
+; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]]
+; DEBUG-NEXT: ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]]
+;
+entry:
+ %cond = alloca <4 x i32>, align 8
+ %coerce.dive2 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
+ store [2 x i64] %cond.coerce, ptr %coerce.dive2, align 8
+ %m5 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
+ %0 = load <4 x i32>, ptr %m5, align 8
+ ret <4 x i32> %0
+}
+
+; The following test should not crash the compiler
+; (calls to CheckCandidateType from createAndCheckVectorTypesForPromotion may change the memory to hold CandidateTys.data())
+define noundef zeroext i1 @CandidateTysRealloc() personality ptr null {
+entry:
+ %alloca = alloca <4x i32>, align 16
+ store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %alloca, align 16
+ br label %bb.1
+
+bb.1:
+ br label %bb.1
+
+bb.2:
+ %Load0 = load <4 x i32>, ptr %alloca, align 16
+ store <4 x i32> zeroinitializer, ptr %alloca, align 16
+ %Load1 = load <4 x i32>, ptr %alloca, align 16
+ br label %bb.3
+
+bb.3:
+ br label %bb.3
+
+bb.4:
+ %Load2 = load i64, ptr %alloca, align 16
+ %Load3 = load <4 x i32>, ptr %alloca, align 16
+ store <4 x i32> zeroinitializer, ptr %alloca, align 16
+ br label %bb.5
+
+bb.5:
+ br label %bb.5
+}
+
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
declare void @llvm.lifetime.end.p0(i64, ptr)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
jrbyrnes
approved these changes
Mar 21, 2024
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Loading status checks…
Change-Id: Ic77f87290905addadd5819dff2d0c62f031022ab (cherry picked from commit 1e828f8)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Backport 1e828f8
Requested by: @dianqk