-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] PromoteAlloca: Handle load/store subvectors using non-constant indexes #71505
Conversation
…nt indexes I assumed indexes were always ConstantInts, but that's not always the case. They can be other things as well. We can easily handle that by just emitting an add and let InstSimplify do the constant folding for cases where it's really a ConstantInt. Solves SWDEV-429935
@llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesI assumed indexes were always ConstantInts, but that's not always the case. They can be other things as well. We can easily handle that by just emitting an add and let InstSimplify do the constant folding for cases where it's really a ConstantInt. Solves SWDEV-429935 Patch is 23.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71505.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 3707a960211eb49..29591ddd669c95e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -386,7 +386,6 @@ static Value *promoteAllocaUserToVector(
};
Type *VecEltTy = VectorTy->getElementType();
- const unsigned NumVecElts = VectorTy->getNumElements();
switch (Inst->getOpcode()) {
case Instruction::Load: {
@@ -419,11 +418,12 @@ static Value *promoteAllocaUserToVector(
auto *SubVecTy = FixedVectorType::get(VecEltTy, NumLoadedElts);
assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
- unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
Value *SubVec = PoisonValue::get(SubVecTy);
for (unsigned K = 0; K < NumLoadedElts; ++K) {
+ Value *CurIdx =
+ Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
SubVec = Builder.CreateInsertElement(
- SubVec, Builder.CreateExtractElement(CurVal, IndexVal + K), K);
+ SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);
}
if (AccessTy->isPtrOrPtrVectorTy())
@@ -479,12 +479,12 @@ static Value *promoteAllocaUserToVector(
Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
- unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
Value *CurVec = GetOrLoadCurrentVectorValue();
- for (unsigned K = 0; K < NumWrittenElts && ((IndexVal + K) < NumVecElts);
- ++K) {
+ for (unsigned K = 0; K < NumWrittenElts; ++K) {
+ Value *CurIdx =
+ Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
CurVec = Builder.CreateInsertElement(
- CurVec, Builder.CreateExtractElement(Val, K), IndexVal + K);
+ CurVec, Builder.CreateExtractElement(Val, K), CurIdx);
}
return CurVec;
}
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll
index 1262be9d36765b5..1e49500a243e100 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-loadstores.ll
@@ -43,9 +43,11 @@ define <4 x i64> @test_fullvec_out_of_bounds(<4 x i64> %arg) {
; CHECK-SAME: (<4 x i64> [[ARG:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i64 3
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> <i64 undef, i64 poison, i64 poison, i64 poison>, i64 [[TMP0]], i64 1
-; CHECK-NEXT: ret <4 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 3
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[ARG]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[ARG]], i64 2
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[ARG]], i64 3
+; CHECK-NEXT: ret <4 x i64> poison
;
entry:
%stack = alloca [4 x i64], align 4, addrspace(5)
@@ -159,9 +161,9 @@ define void @alloca_load_store_ptr_mixed_ptrvec(<2 x ptr addrspace(3)> %arg) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[ARG]] to <2 x i32>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef, i32 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP3]], i64 1
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr addrspace(3)>
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
index 1f09bc7bfda86bc..765aa250a48f4f0 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-subvecs.ll
@@ -8,17 +8,17 @@ define void @test_trivial_subvector(<2 x i64> %val.0, <2 x i64> %val.1) {
; CHECK-SAME: (<2 x i64> [[VAL_0:%.*]], <2 x i64> [[VAL_1:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[VAL_0]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i64 0
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[VAL_0]], i64 1
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP2]], i64 1
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP2]], i32 1
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[VAL_1]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP4]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[VAL_1]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP6]], i64 2
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP6]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[VAL_1]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> [[TMP7]], i64 [[TMP8]], i64 2
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> [[TMP7]], i64 [[TMP8]], i32 2
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[VAL_1]], i64 1
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP10]], i64 3
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP10]], i32 3
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP4]], i64 1
; CHECK-NEXT: [[DUMMYUSER:%.*]] = freeze <2 x i64> [[TMP13]]
@@ -56,36 +56,36 @@ define void @test_different_type_subvector(<4 x i32> %val.0, <8 x i16> %val.1, <
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VAL_0]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP3]], i64 1
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8>
; CHECK-NEXT: [[DUMMYUSER:%.*]] = freeze <16 x i8> [[TMP7]]
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[VAL_1]] to <2 x i64>
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP9]], i64 1
+; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP9]], i32 1
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP11]], i64 2
+; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP11]], i32 2
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> [[TMP13]], i64 [[TMP11]], i64 1
; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP14]] to <8 x i16>
; CHECK-NEXT: [[DUMMYUSE_1:%.*]] = freeze <8 x i16> [[TMP15]]
; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[VAL_2]] to <2 x i64>
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP16]], i64 0
-; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP17]], i64 2
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP17]], i32 2
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i64> [[TMP16]], i64 1
-; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP19]], i64 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP19]], i32 3
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[TMP17]], i64 0
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP19]], i64 1
; CHECK-NEXT: [[TMP23:%.*]] = bitcast <2 x i64> [[TMP22]] to <4 x i32>
; CHECK-NEXT: [[DUMMYUSE_2:%.*]] = freeze <4 x i32> [[TMP23]]
; CHECK-NEXT: [[TMP24:%.*]] = bitcast <128 x i1> [[VAL_3]] to <2 x i64>
; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP24]], i64 0
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP25]], i64 2
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP25]], i32 2
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i64> [[TMP24]], i64 1
-; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i64> [[TMP26]], i64 [[TMP27]], i64 3
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i64> [[TMP26]], i64 [[TMP27]], i32 3
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i64 0
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i64> [[TMP29]], i64 [[TMP25]], i64 1
; CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <128 x i1>
@@ -168,31 +168,31 @@ define void @test_different_type_subvector_fp(<2 x double> %val.0, <4 x float> %
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VAL_2]] to <2 x double>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> undef, double [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> undef, double [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP0]], i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP2]], double [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP2]], double [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[TMP3]], i64 1
; CHECK-NEXT: [[DUMMYUSER:%.*]] = freeze <2 x double> [[TMP6]]
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x float> [[VAL_1]] to <2 x double>
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[TMP4]], double [[TMP8]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[TMP4]], double [[TMP8]], i32 0
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP7]], i64 1
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP10]], i64 1
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP10]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> [[TMP12]], double [[TMP10]], i64 1
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x double> [[TMP13]] to <4 x float>
; CHECK-NEXT: [[DUMMYUSE_1:%.*]] = freeze <4 x float> [[TMP14]]
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[VAL_0]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP15]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP15]], i32 0
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x double> [[VAL_0]], i64 1
-; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP17]], i64 1
+; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP17]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TMP15]], i64 0
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP19]], double [[TMP17]], i64 1
; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x double> [[TMP20]] to <8 x half>
; CHECK-NEXT: [[DUMMYUSE_2:%.*]] = freeze <8 x half> [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x double> [[TMP18]], double 2.075080e-322, i64 0
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> [[TMP22]], double 3.162020e-322, i64 1
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x double> [[TMP18]], double 2.075080e-322, i32 0
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> [[TMP22]], double 3.162020e-322, i32 1
; CHECK-NEXT: [[DUMMYUSE_3:%.*]] = freeze <4 x i32> <i32 42, i32 0, i32 64, i32 0>
; CHECK-NEXT: ret void
;
@@ -224,9 +224,9 @@ define void @test_different_type_subvector_ptrs(<2 x ptr addrspace(1)> %val.0, <
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[VAL_0]] to <2 x i64>
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i32 0
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
-; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP3]], i64 1
+; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP3]], i32 1
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP3]], i64 1
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr addrspace(1)>
@@ -234,9 +234,9 @@ define void @test_different_type_subvector_ptrs(<2 x ptr addrspace(1)> %val.0, <
; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint <4 x ptr addrspace(3)> [[VAL_1]] to <4 x i32>
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <2 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP10]], i32 0
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP11]], i64 [[TMP12]], i64 1
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP11]], i64 [[TMP12]], i32 1
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i64> [[TMP14]], i64 [[TMP12]], i64 1
; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP15]] to <4 x i32>
@@ -265,13 +265,13 @@ define void @test_different_type_subvector_ptralloca(<2 x i64> %val.0, <8 x i16>
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VAL_0]] to <4 x i32>
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr <4 x i32> [[TMP0]] to <4 x ptr addrspace(5)>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr addrspace(5)> undef, ptr addrspace(5) [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr addrspace(5)> undef, ptr addrspace(5) [[TMP2]], i32 0
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP1]], i64 1
-; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP3]], ptr addrspace(5) [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP3]], ptr addrspace(5) [[TMP4]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP1]], i64 2
-; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP5]], ptr addrspace(5) [[TMP6]], i64 2
+; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP5]], ptr addrspace(5) [[TMP6]], i32 2
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP1]], i64 3
-; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP7]], ptr addrspace(5) [[TMP8]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP7]], ptr addrspace(5) [[TMP8]], i32 3
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr addrspace(5)> poison, ptr addrspace(5) [[TMP2]], i64 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP10]], ptr addrspace(5) [[TMP4]], i64 1
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP11]], ptr addrspace(5) [[TMP6]], i64 2
@@ -282,13 +282,13 @@ define void @test_different_type_subvector_ptralloca(<2 x i64> %val.0, <8 x i16>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[VAL_1]] to <4 x i32>
; CHECK-NEXT: [[TMP17:%.*]] = inttoptr <4 x i32> [[TMP16]] to <4 x ptr addrspace(5)>
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP17]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP9]], ptr addrspace(5) [[TMP18]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP9]], ptr addrspace(5) [[TMP18]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP17]], i64 1
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP19]], ptr addrspace(5) [[TMP20]], i64 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP19]], ptr addrspace(5) [[TMP20]], i32 1
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP17]], i64 2
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP21]], ptr addrspace(5) [[TMP22]], i64 2
+; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP21]], ptr addrspace(5) [[TMP22]], i32 2
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP17]], i64 3
-; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP23]], ptr addrspace(5) [[TMP24]], i64 3
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP23]], ptr addrspace(5) [[TMP24]], i32 3
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x ptr addrspace(5)> poison, ptr addrspace(5) [[TMP18]], i64 0
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP26]], ptr addrspace(5) [[TMP20]], i64 1
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP27]], ptr addrspace(5) [[TMP22]], i64 2
@@ -299,9 +299,9 @@ define void @test_different_type_subvector_ptralloca(<2 x i64> %val.0, <8 x i16>
; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[VAL_2]] to <2 x i32>
; CHECK-NEXT: [[TMP33:%.*]] = inttoptr <2 x i32> [[TMP32]] to <2 x ptr addrspace(5)>
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x ptr addrspace(5)> [[TMP33]], i64 0
-; CHECK-NEXT: [[TMP35:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP25]], ptr addrspace(5) [[TMP34]], i64 0
+; CHECK-NEXT: [[TMP35:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP25]], ptr addrspace(5) [[TMP34]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x ptr addrspace(5)> [[TMP33]], i64 1
-; CHECK-NEXT: [[TMP37:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP35]], ptr addrspace(5) [[TMP36]], i64 1
+; CHECK-NEXT: [[TMP37:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP35]], ptr addrspace(5) [[TMP36]], i32 1
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x ptr addrspace(5)> poison, ptr addrspace(5) [[TMP34]], i64 0
; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x ptr addrspace(5)> [[TMP38]], ptr addrspace(5) [[TMP36]], i64 1
; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint <2 x ptr addrspace(5)> [[TMP39]] to <2 x i32>
@@ -332,7 +332,8 @@ define void @test_out_of_bounds_subvec(<2 x i64> %val) {
; CHECK-SAME: (<2 x i64> [[VAL:%.*]]) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[VAL]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64...
[truncated]
|
I assumed indexes were always ConstantInts, but that's not always the case. They can be other things as well. We can easily handle that by just emitting an add and let InstSimplify do the constant folding for cases where it's really a ConstantInt.
Solves SWDEV-429935