Skip to content

Commit

Permalink
[SROA] Try harder to find a vector promotion viable type when rewriting
Browse files Browse the repository at this point in the history
We are seeing significant performance loss when an alloca fails to get promoted
to register. I have observed that this is due to the common type found when
attempting to rewrite partition users being unviable for promotion. While if we
would have continue looking for a type, we would have found a subtype in the
original allocated type that would have enabled promotion. Thus first check if
the initial common type found is promotion viable and if not then continue
looking instead of stopping with the initial common type found.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D128073

Change-Id: I71cb958abec6c64b1689c813037f8aed4709d2b5
  • Loading branch information
vangthao95 authored and zhang2amd committed Sep 6, 2022
1 parent 52890b5 commit 3cf23f7
Show file tree
Hide file tree
Showing 2 changed files with 462 additions and 27 deletions.
78 changes: 51 additions & 27 deletions llvm/lib/Transforms/Scalar/SROA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1847,6 +1847,34 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
return true;
}

/// Test whether a vector type is viable for promotion.
///
/// This implements the necessary checking for \c isVectorPromotionViable over
/// all slices of the alloca for the given VectorType.
static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
const DataLayout &DL) {
uint64_t ElementSize =
DL.getTypeSizeInBits(VTy->getElementType()).getFixedSize();

// While the definition of LLVM vectors is bitpacked, we don't support sizes
// that aren't byte sized.
if (ElementSize % 8)
return false;
assert((DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 &&
"vector size not a multiple of element size?");
ElementSize /= 8;

for (const Slice &S : P)
if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
return false;

for (const Slice *S : P.splitSliceTails())
if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
return false;

return true;
}

/// Test whether the given alloca partitioning and range of slices can be
/// promoted to a vector.
///
Expand Down Expand Up @@ -1939,31 +1967,8 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
CandidateTys.resize(1);
}

// Try each vector type, and return the one which works.
auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
uint64_t ElementSize =
DL.getTypeSizeInBits(VTy->getElementType()).getFixedSize();

// While the definition of LLVM vectors is bitpacked, we don't support sizes
// that aren't byte sized.
if (ElementSize % 8)
return false;
assert((DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 &&
"vector size not a multiple of element size?");
ElementSize /= 8;

for (const Slice &S : P)
if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
return false;

for (const Slice *S : P.splitSliceTails())
if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
return false;

return true;
};
for (VectorType *VTy : CandidateTys)
if (CheckVectorTypeForPromotion(VTy))
if (checkVectorTypeForPromotion(P, VTy, DL))
return VTy;

return nullptr;
Expand Down Expand Up @@ -4246,26 +4251,45 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
Type *SliceTy = nullptr;
VectorType *SliceVecTy = nullptr;
const DataLayout &DL = AI.getModule()->getDataLayout();
std::pair<Type *, IntegerType *> CommonUseTy =
findCommonType(P.begin(), P.end(), P.endOffset());
// Do all uses operate on the same type?
if (CommonUseTy.first)
if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size())
if (DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >= P.size()) {
SliceTy = CommonUseTy.first;
SliceVecTy = dyn_cast<VectorType>(SliceTy);
}
// If not, can we find an appropriate subtype in the original allocated type?
if (!SliceTy)
if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
P.beginOffset(), P.size()))
SliceTy = TypePartitionTy;

// If still not, can we use the largest bitwidth integer type used?
if (!SliceTy && CommonUseTy.second)
if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size())
if (DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >= P.size()) {
SliceTy = CommonUseTy.second;
SliceVecTy = dyn_cast<VectorType>(SliceTy);
}
if ((!SliceTy || (SliceTy->isArrayTy() &&
SliceTy->getArrayElementType()->isIntegerTy())) &&
DL.isLegalInteger(P.size() * 8))
DL.isLegalInteger(P.size() * 8)) {
SliceTy = Type::getIntNTy(*C, P.size() * 8);
}

// If the common use types are not viable for promotion then attempt to find
// another type that is viable.
if (SliceVecTy && !checkVectorTypeForPromotion(P, SliceVecTy, DL))
if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
P.beginOffset(), P.size())) {
VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
if (TypePartitionVecTy &&
checkVectorTypeForPromotion(P, TypePartitionVecTy, DL))
SliceTy = TypePartitionTy;
}

if (!SliceTy)
SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
assert(DL.getTypeAllocSize(SliceTy).getFixedSize() >= P.size());
Expand Down
Loading

0 comments on commit 3cf23f7

Please sign in to comment.