Skip to content

Commit e20f2db

Browse files
jrbyrnesllvmbot
authored andcommitted
[SROA]: Only defer trying partial sized ptr or ptr vector types
Change-Id: Ic77f87290905addadd5819dff2d0c62f031022ab (cherry picked from commit 1e828f8)
1 parent 26a1d66 commit e20f2db

File tree

2 files changed

+115
-27
lines changed

2 files changed

+115
-27
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

+53-27
Original file line numberDiff line numberDiff line change
@@ -2257,6 +2257,41 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
22572257
return nullptr;
22582258
}
22592259

2260+
static VectorType *createAndCheckVectorTypesForPromotion(
2261+
SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2262+
function_ref<void(Type *)> CheckCandidateType, Partition &P,
2263+
const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2264+
bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2265+
bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy) {
2266+
[[maybe_unused]] VectorType *OriginalElt =
2267+
CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2268+
// Consider additional vector types where the element type size is a
2269+
// multiple of load/store element size.
2270+
for (Type *Ty : OtherTys) {
2271+
if (!VectorType::isValidElementType(Ty))
2272+
continue;
2273+
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2274+
// Make a copy of CandidateTys and iterate through it, because we
2275+
// might append to CandidateTys in the loop.
2276+
for (VectorType *const VTy : CandidateTysCopy) {
2277+
// The elements in the copy should remain invariant throughout the loop
2278+
assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2279+
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2280+
unsigned ElementSize =
2281+
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2282+
if (TypeSize != VectorSize && TypeSize != ElementSize &&
2283+
VectorSize % TypeSize == 0) {
2284+
VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2285+
CheckCandidateType(NewVTy);
2286+
}
2287+
}
2288+
}
2289+
2290+
return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
2291+
CommonEltTy, HaveVecPtrTy,
2292+
HaveCommonVecPtrTy, CommonVecPtrTy);
2293+
}
2294+
22602295
/// Test whether the given alloca partitioning and range of slices can be
22612296
/// promoted to a vector.
22622297
///
@@ -2271,6 +2306,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
22712306
// we have different element types.
22722307
SmallVector<VectorType *, 4> CandidateTys;
22732308
SetVector<Type *> LoadStoreTys;
2309+
SetVector<Type *> DeferredTys;
22742310
Type *CommonEltTy = nullptr;
22752311
VectorType *CommonVecPtrTy = nullptr;
22762312
bool HaveVecPtrTy = false;
@@ -2314,42 +2350,32 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
23142350
Ty = SI->getValueOperand()->getType();
23152351
else
23162352
continue;
2353+
2354+
auto CandTy = Ty->getScalarType();
2355+
if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2356+
S.endOffset() != P.endOffset())) {
2357+
DeferredTys.insert(Ty);
2358+
continue;
2359+
}
2360+
23172361
LoadStoreTys.insert(Ty);
23182362
// Consider any loads or stores that are the exact size of the slice.
23192363
if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
23202364
CheckCandidateType(Ty);
23212365
}
23222366

2323-
if (auto *VTy = checkVectorTypesForPromotion(
2324-
P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2367+
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2368+
if (auto *VTy = createAndCheckVectorTypesForPromotion(
2369+
LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2370+
CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
23252371
HaveCommonVecPtrTy, CommonVecPtrTy))
23262372
return VTy;
23272373

2328-
// Consider additional vector types where the element type size is a
2329-
// multiple of load/store element size.
2330-
for (Type *Ty : LoadStoreTys) {
2331-
if (!VectorType::isValidElementType(Ty))
2332-
continue;
2333-
unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2334-
// Make a copy of CandidateTys and iterate through it, because we might
2335-
// append to CandidateTys in the loop.
2336-
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2337-
CandidateTys.clear();
2338-
for (VectorType *&VTy : CandidateTysCopy) {
2339-
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2340-
unsigned ElementSize =
2341-
DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2342-
if (TypeSize != VectorSize && TypeSize != ElementSize &&
2343-
VectorSize % TypeSize == 0) {
2344-
VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2345-
CheckCandidateType(NewVTy);
2346-
}
2347-
}
2348-
}
2349-
2350-
return checkVectorTypesForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
2351-
CommonEltTy, HaveVecPtrTy,
2352-
HaveCommonVecPtrTy, CommonVecPtrTy);
2374+
CandidateTys.clear();
2375+
return createAndCheckVectorTypesForPromotion(
2376+
DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2377+
HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2378+
CommonVecPtrTy);
23532379
}
23542380

23552381
/// Test whether a slice of an alloca is valid for integer widening.

llvm/test/Transforms/SROA/vector-promotion.ll

+62
Original file line numberDiff line numberDiff line change
@@ -1388,6 +1388,68 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) {
13881388
ret <4 x ptr> %sroaval
13891389
}
13901390

1391+
define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) {
1392+
; CHECK-LABEL: @validLoadStoreTy(
1393+
; CHECK-NEXT: entry:
1394+
; CHECK-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0
1395+
; CHECK-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0
1396+
; CHECK-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1
1397+
; CHECK-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1
1398+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>
1399+
; CHECK-NEXT: ret <4 x i32> [[TMP0]]
1400+
;
1401+
; DEBUG-LABEL: @validLoadStoreTy(
1402+
; DEBUG-NEXT: entry:
1403+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]]
1404+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]]
1405+
; DEBUG-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]]
1406+
; DEBUG-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]]
1407+
; DEBUG-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]]
1408+
; DEBUG-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]]
1409+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]]
1410+
; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]]
1411+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]]
1412+
; DEBUG-NEXT: ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]]
1413+
;
1414+
entry:
1415+
%cond = alloca <4 x i32>, align 8
1416+
%coerce.dive2 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
1417+
store [2 x i64] %cond.coerce, ptr %coerce.dive2, align 8
1418+
%m5 = getelementptr inbounds <4 x i32>, ptr %cond, i32 0, i32 0
1419+
%0 = load <4 x i32>, ptr %m5, align 8
1420+
ret <4 x i32> %0
1421+
}
1422+
1423+
; The following test should not crash the compiler
1424+
; (calls to CheckCandidateType from createAndCheckVectorTypesForPromotion may change the memory to hold CandidateTys.data())
1425+
define noundef zeroext i1 @CandidateTysRealloc() personality ptr null {
1426+
entry:
1427+
%alloca = alloca <4x i32>, align 16
1428+
store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %alloca, align 16
1429+
br label %bb.1
1430+
1431+
bb.1:
1432+
br label %bb.1
1433+
1434+
bb.2:
1435+
%Load0 = load <4 x i32>, ptr %alloca, align 16
1436+
store <4 x i32> zeroinitializer, ptr %alloca, align 16
1437+
%Load1 = load <4 x i32>, ptr %alloca, align 16
1438+
br label %bb.3
1439+
1440+
bb.3:
1441+
br label %bb.3
1442+
1443+
bb.4:
1444+
%Load2 = load i64, ptr %alloca, align 16
1445+
%Load3 = load <4 x i32>, ptr %alloca, align 16
1446+
store <4 x i32> zeroinitializer, ptr %alloca, align 16
1447+
br label %bb.5
1448+
1449+
bb.5:
1450+
br label %bb.5
1451+
}
1452+
13911453
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
13921454
declare void @llvm.lifetime.end.p0(i64, ptr)
13931455
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:

0 commit comments

Comments
 (0)