Skip to content

Commit b11838d

Browse files
committed
[InstCombine] Canonicalize gep T, (gep i8, base, C1), (Index +nsw C2)
1 parent 63d47da commit b11838d

File tree

2 files changed

+59
-19
lines changed

2 files changed

+59
-19
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2038,6 +2038,54 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
20382038
return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
20392039
}
20402040

2041+
// Canonicalization:
2042+
// gep T, (gep i8, base, C1), (Index +nsw C2) into
2043+
// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2044+
static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2045+
GEPOperator *Src,
2046+
InstCombinerImpl &IC) {
2047+
if (GEP.getNumIndices() != 1)
2048+
return nullptr;
2049+
auto &DL = IC.getDataLayout();
2050+
if (!Src->getSourceElementType()->isIntegerTy(8) ||
2051+
!Src->hasAllConstantIndices())
2052+
return nullptr;
2053+
Value *VarIndex;
2054+
const APInt *C2;
2055+
Type *PtrTy = Src->getType()->getScalarType();
2056+
unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2057+
if (!(GEP.getOperand(1)->getType()->getScalarSizeInBits() >=
2058+
IndexSizeInBits &&
2059+
match(GEP.getOperand(1), m_Add(m_Value(VarIndex), m_APInt(C2)))) &&
2060+
!match(GEP.getOperand(1),
2061+
m_SExtOrSelf(
2062+
m_CombineOr(m_NSWAdd(m_Value(VarIndex), m_APInt(C2)),
2063+
m_DisjointOr(m_Value(VarIndex), m_APInt(C2))))))
2064+
return nullptr;
2065+
Type *BaseType = GEP.getSourceElementType();
2066+
APInt C1(IndexSizeInBits, 0);
2067+
// Add the offset for Src (which is fully constant).
2068+
if (!Src->accumulateConstantOffset(DL, C1))
2069+
return nullptr;
2070+
APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2071+
bool Overflow = false;
2072+
APInt C3 = TypeSize.smul_ov(C2->sext(TypeSize.getBitWidth()), Overflow);
2073+
if (Overflow)
2074+
return nullptr;
2075+
APInt NewOffset = C1.sadd_ov(C3, Overflow);
2076+
if (Overflow)
2077+
return nullptr;
2078+
if (NewOffset.isZero() ||
2079+
(Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2080+
Value *GEPConst =
2081+
IC.Builder.CreateGEP(IC.Builder.getInt8Ty(), Src->getPointerOperand(),
2082+
IC.Builder.getInt(NewOffset));
2083+
return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex);
2084+
}
2085+
2086+
return nullptr;
2087+
}
2088+
20412089
Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
20422090
GEPOperator *Src) {
20432091
// Combine Indices - If the source pointer to this getelementptr instruction
@@ -2046,6 +2094,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
20462094
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
20472095
return nullptr;
20482096

2097+
if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2098+
return I;
2099+
20492100
// For constant GEPs, use a more general offset-based folding approach.
20502101
Type *PtrTy = Src->getType()->getScalarType();
20512102
if (GEP.hasAllConstantIndices() &&

llvm/test/Transforms/InstCombine/gepofconstgepi8.ll

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@ define ptr @test_zero(ptr %base, i64 %a) {
1111
; CHECK-LABEL: define ptr @test_zero(
1212
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
1313
; CHECK-NEXT: entry:
14-
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
15-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
16-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 1
14+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
1715
; CHECK-NEXT: ret ptr [[P2]]
1816
;
1917
entry:
@@ -27,9 +25,8 @@ define ptr @test_nonzero(ptr %base, i64 %a) {
2725
; CHECK-LABEL: define ptr @test_nonzero(
2826
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
2927
; CHECK-NEXT: entry:
30-
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
31-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
32-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 2
28+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 4
29+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
3330
; CHECK-NEXT: ret ptr [[P2]]
3431
;
3532
entry:
@@ -43,9 +40,7 @@ define ptr @test_or_disjoint(ptr %base, i64 %a) {
4340
; CHECK-LABEL: define ptr @test_or_disjoint(
4441
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
4542
; CHECK-NEXT: entry:
46-
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
47-
; CHECK-NEXT: [[INDEX:%.*]] = or disjoint i64 [[A]], 1
48-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]]
43+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
4944
; CHECK-NEXT: ret ptr [[P2]]
5045
;
5146
entry:
@@ -59,10 +54,9 @@ define ptr @test_zero_multiuse_index(ptr %base, i64 %a) {
5954
; CHECK-LABEL: define ptr @test_zero_multiuse_index(
6055
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
6156
; CHECK-NEXT: entry:
62-
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
6357
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 1
6458
; CHECK-NEXT: call void @use64(i64 [[INDEX]])
65-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]]
59+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
6660
; CHECK-NEXT: ret ptr [[P2]]
6761
;
6862
entry:
@@ -79,8 +73,7 @@ define ptr @test_zero_multiuse_ptr(ptr %base, i64 %a) {
7973
; CHECK-NEXT: entry:
8074
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
8175
; CHECK-NEXT: call void @useptr(ptr [[P1]])
82-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
83-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 1
76+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
8477
; CHECK-NEXT: ret ptr [[P2]]
8578
;
8679
entry:
@@ -95,10 +88,8 @@ define ptr @test_zero_sext_add_nsw(ptr %base, i32 %a) {
9588
; CHECK-LABEL: define ptr @test_zero_sext_add_nsw(
9689
; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) {
9790
; CHECK-NEXT: entry:
98-
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
9991
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[A]] to i64
100-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
101-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 1
92+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]]
10293
; CHECK-NEXT: ret ptr [[P2]]
10394
;
10495
entry:
@@ -112,10 +103,8 @@ define ptr @test_zero_trunc_add(ptr %base, i128 %a) {
112103
; CHECK-LABEL: define ptr @test_zero_trunc_add(
113104
; CHECK-SAME: ptr [[BASE:%.*]], i128 [[A:%.*]]) {
114105
; CHECK-NEXT: entry:
115-
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
116106
; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[A]] to i64
117-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
118-
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 1
107+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]]
119108
; CHECK-NEXT: ret ptr [[P2]]
120109
;
121110
entry:

0 commit comments

Comments
 (0)