diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 1d016e684c48f..258047e2b56fd 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4004,7 +4004,14 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, Index = clampVectorIndex(MIRBuilder, Index, VecTy); - LLT IdxTy = MRI.getType(Index); + // Convert index to the correct size for the address space. + const DataLayout &DL = MIRBuilder.getDataLayout(); + unsigned AS = MRI.getType(VecPtr).getAddressSpace(); + unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8; + LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits); + if (IdxTy != MRI.getType(Index)) + Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0); + auto Mul = MIRBuilder.buildMul(IdxTy, Index, MIRBuilder.buildConstant(IdxTy, EltSize)); diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index 1d0757c5d7f5f..b397aed76aa20 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1301,6 +1301,16 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { if (OffsetTy.isPointerOrPointerVector()) report("gep offset operand must not be a pointer", MI); + if (PtrTy.isPointerOrPointerVector()) { + const DataLayout &DL = MF->getDataLayout(); + unsigned AS = PtrTy.getAddressSpace(); + unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8; + if (OffsetTy.getScalarSizeInBits() != IndexSizeInBits) { + report("gep offset operand must match index size for address space", + MI); + } + } + // TODO: Is the offset allowed to be a scalar with a vector? break; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir index 40e5e8ebb7731..1233a0af42453 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir @@ -11,7 +11,7 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44 ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[C]](p64) ; CHECK: $x0 = COPY [[PTRTOINT]](s64) - %1:_(s32) = G_CONSTANT i32 42 + %1:_(s64) = G_CONSTANT i64 42 %2:_(s32) = G_CONSTANT i32 2 %3:_(p64) = G_INTTOPTR %2 %4:_(p64) = G_PTR_ADD %3, %1 @@ -26,7 +26,7 @@ body: | ; CHECK-LABEL: name: agc.test_combine_ptradd_constants_ptrres ; CHECK: [[C:%[0-9]+]]:_(p64) = G_CONSTANT i64 44 ; CHECK: $x0 = COPY [[C]](p64) - %1:_(s32) = G_CONSTANT i32 42 + %1:_(s64) = G_CONSTANT i64 42 %2:_(s32) = G_CONSTANT i32 2 %3:_(p64) = G_INTTOPTR %2 %4:_(p64) = G_PTR_ADD %3, %1 @@ -39,12 +39,12 @@ body: | liveins: $x0, $x1 ; Ensure non-constant G_PTR_ADDs are not folded. ; CHECK-LABEL: name: agc.test_not_combine_variable_ptradd - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 ; CHECK: [[COPY:%[0-9]+]]:_(p64) = COPY $x1 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p64) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s64) = G_PTRTOINT [[PTR_ADD]](p64) ; CHECK: $x0 = COPY [[PTRTOINT]](s64) - %1:_(s32) = G_CONSTANT i32 42 + %1:_(s64) = G_CONSTANT i64 42 %2:_(p64) = COPY $x1 %3:_(p64) = G_PTR_ADD %2, %1 %4:_(s64) = G_PTRTOINT %3 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir index 7bd9725d0fc87..1ecd36b55380a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ptr-add.mir @@ -1,23 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=aarch64 -run-pass=legalizer %s -o - | FileCheck %s --- -name: test_ptr_add_small -body: | - bb.0.entry: - ; CHECK-LABEL: name: test_ptr_add_small - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY1]], 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64) - ; CHECK: $x0 = COPY [[PTR_ADD]](p0) - %0:_(p0) = COPY $x0 - %1:_(s64) = COPY $x1 - %2:_(s8) = G_TRUNC %1(s64) - %3:_(p0) = G_PTR_ADD %0, %2(s8) - $x0 = COPY %3(p0) - -... ---- name: test_ptr_add_vec_p0 body: | bb.0.entry: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir index 88d214e43c82e..c30fab32fccbf 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-load-or-pattern.mir @@ -38,18 +38,18 @@ body: | ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_1:_(s32) = G_CONSTANT i32 1 - %cst_2:_(s32) = G_CONSTANT i32 2 - %cst_3:_(s32) = G_CONSTANT i32 3 + %cst_1:_(s64) = G_CONSTANT i64 1 + %cst_2:_(s64) = G_CONSTANT i64 2 + %cst_3:_(s64) = G_CONSTANT i64 3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 - %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) @@ -104,18 +104,18 @@ body: | ; BIG: %full_load:_(s32) = G_LOAD %ptr(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_1:_(s32) = G_CONSTANT i32 1 - %cst_2:_(s32) = G_CONSTANT i32 2 - %cst_3:_(s32) = G_CONSTANT i32 3 + %cst_1:_(s64) = G_CONSTANT i64 1 + %cst_2:_(s64) = G_CONSTANT i64 2 + %cst_3:_(s64) = G_CONSTANT i64 3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 - %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) %elt0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) @@ -162,18 +162,18 @@ body: | ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_1:_(s32) = G_CONSTANT i32 1 - %cst_2:_(s32) = G_CONSTANT i32 2 - %cst_3:_(s32) = G_CONSTANT i32 3 + %cst_1:_(s64) = G_CONSTANT i64 1 + %cst_2:_(s64) = G_CONSTANT i64 2 + %cst_3:_(s64) = G_CONSTANT i64 3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 - %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) @@ -414,35 +414,35 @@ body: | ; LITTLE-LABEL: name: nonzero_start_idx_positive_little_endian_pat ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 + ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %ptr:_(p0) = COPY $x0 - ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) + ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_positive_little_endian_pat ; BIG: liveins: $x0, $x1 - ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 + ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %ptr:_(p0) = COPY $x0 - ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) + ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_1:_(s32) = G_CONSTANT i32 1 - %cst_2:_(s32) = G_CONSTANT i32 2 - %cst_3:_(s32) = G_CONSTANT i32 3 - %cst_4:_(s32) = G_CONSTANT i32 4 + %cst_1:_(s64) = G_CONSTANT i64 1 + %cst_2:_(s64) = G_CONSTANT i64 2 + %cst_3:_(s64) = G_CONSTANT i64 3 + %cst_4:_(s64) = G_CONSTANT i64 4 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x0 - %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32) + %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) + %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) %elt3:_(s32) = G_ZEXTLOAD %ptr_elt_3(p0) :: (load (s8)) @@ -476,35 +476,35 @@ body: | ; LITTLE-LABEL: name: nonzero_start_idx_positive_big_endian_pat ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 + ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 ; LITTLE: %ptr:_(p0) = COPY $x0 - ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) + ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_positive_big_endian_pat ; BIG: liveins: $x0, $x1 - ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 + ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 ; BIG: %ptr:_(p0) = COPY $x0 - ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) + ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_1(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_1:_(s32) = G_CONSTANT i32 1 - %cst_2:_(s32) = G_CONSTANT i32 2 - %cst_3:_(s32) = G_CONSTANT i32 3 - %cst_4:_(s32) = G_CONSTANT i32 4 + %cst_1:_(s64) = G_CONSTANT i64 1 + %cst_2:_(s64) = G_CONSTANT i64 2 + %cst_3:_(s64) = G_CONSTANT i64 3 + %cst_4:_(s64) = G_CONSTANT i64 4 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x0 - %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) - %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s32) + %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) + %ptr_elt_4:_(p0) = G_PTR_ADD %ptr, %cst_4(s64) %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) @@ -538,33 +538,33 @@ body: | ; LITTLE-LABEL: name: nonzero_start_idx_negative_little_endian_pat ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 + ; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3 ; LITTLE: %ptr:_(p0) = COPY $x0 - ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) + ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) ; LITTLE: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_negative_little_endian_pat ; BIG: liveins: $x0, $x1 - ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 + ; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3 ; BIG: %ptr:_(p0) = COPY $x0 - ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) + ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) ; BIG: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; BIG: %full_load:_(s32) = G_BSWAP [[LOAD]] ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_neg_1:_(s32) = G_CONSTANT i32 -1 - %cst_neg_2:_(s32) = G_CONSTANT i32 -2 - %cst_neg_3:_(s32) = G_CONSTANT i32 -3 + %cst_neg_1:_(s64) = G_CONSTANT i64 -1 + %cst_neg_2:_(s64) = G_CONSTANT i64 -2 + %cst_neg_3:_(s64) = G_CONSTANT i64 -3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x0 - %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) - %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32) - %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32) + %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) + %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64) + %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64) %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8)) %elt_neg_1:_(s32) = G_ZEXTLOAD %ptr_elt_neg_1(p0) :: (load (s8)) @@ -598,33 +598,33 @@ body: | ; LITTLE-LABEL: name: nonzero_start_idx_negative_big_endian_pat ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 + ; LITTLE: %cst_neg_3:_(s64) = G_CONSTANT i64 -3 ; LITTLE: %ptr:_(p0) = COPY $x0 - ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) + ; LITTLE: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) ; LITTLE: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; LITTLE: %full_load:_(s32) = G_BSWAP [[LOAD]] ; LITTLE: $w1 = COPY %full_load(s32) ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: nonzero_start_idx_negative_big_endian_pat ; BIG: liveins: $x0, $x1 - ; BIG: %cst_neg_3:_(s32) = G_CONSTANT i32 -3 + ; BIG: %cst_neg_3:_(s64) = G_CONSTANT i64 -3 ; BIG: %ptr:_(p0) = COPY $x0 - ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) + ; BIG: %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) ; BIG: %full_load:_(s32) = G_LOAD %ptr_elt_neg_3(p0) :: (load (s32), align 1) ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_neg_1:_(s32) = G_CONSTANT i32 -1 - %cst_neg_2:_(s32) = G_CONSTANT i32 -2 - %cst_neg_3:_(s32) = G_CONSTANT i32 -3 + %cst_neg_1:_(s64) = G_CONSTANT i64 -1 + %cst_neg_2:_(s64) = G_CONSTANT i64 -2 + %cst_neg_3:_(s64) = G_CONSTANT i64 -3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x0 - %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s32) - %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s32) - %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s32) + %ptr_elt_neg_3:_(p0) = G_PTR_ADD %ptr, %cst_neg_3(s64) + %ptr_elt_neg_2:_(p0) = G_PTR_ADD %ptr, %cst_neg_2(s64) + %ptr_elt_neg_1:_(p0) = G_PTR_ADD %ptr, %cst_neg_1(s64) %elt_neg_3:_(s32) = G_ZEXTLOAD %ptr_elt_neg_3(p0) :: (load (s8)) %elt_neg_2:_(s32) = G_ZEXTLOAD %ptr_elt_neg_2(p0) :: (load (s8)) @@ -977,15 +977,15 @@ body: | ; LITTLE-LABEL: name: dont_combine_duplicate_idx ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 - ; LITTLE: %reused_idx:_(s32) = G_CONSTANT i32 2 + ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 + ; LITTLE: %reused_idx:_(s64) = G_CONSTANT i64 2 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) - ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) + ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + ; LITTLE: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) + ; LITTLE: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) @@ -1000,15 +1000,15 @@ body: | ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_duplicate_idx ; BIG: liveins: $x0, $x1 - ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 - ; BIG: %reused_idx:_(s32) = G_CONSTANT i32 2 + ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 + ; BIG: %reused_idx:_(s64) = G_CONSTANT i64 2 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) - ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) + ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + ; BIG: %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) + ; BIG: %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; BIG: %elt2:_(s32) = G_ZEXTLOAD %uses_idx_2(p0) :: (load (s8)) @@ -1021,17 +1021,17 @@ body: | ; BIG: %full_load:_(s32) = G_OR %or1, %or2 ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_1:_(s32) = G_CONSTANT i32 1 - %reused_idx:_(s32) = G_CONSTANT i32 2 + %cst_1:_(s64) = G_CONSTANT i64 1 + %reused_idx:_(s64) = G_CONSTANT i64 2 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 - %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) - %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s32) + %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + %uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) + %also_uses_idx_2:_(p0) = G_PTR_ADD %ptr, %reused_idx(s64) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) @@ -1064,15 +1064,15 @@ body: | ; LITTLE-LABEL: name: dont_combine_duplicate_offset ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 - ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2 - ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3 + ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 + ; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2 + ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 ; LITTLE: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; LITTLE: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) @@ -1087,15 +1087,15 @@ body: | ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_duplicate_offset ; BIG: liveins: $x0, $x1 - ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 - ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2 - ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3 + ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 + ; BIG: %cst_2:_(s64) = G_CONSTANT i64 2 + ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 ; BIG: %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; BIG: %elt1:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) @@ -1108,17 +1108,17 @@ body: | ; BIG: %full_load:_(s32) = G_OR %or1, %or2 ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_1:_(s32) = G_CONSTANT i32 1 - %cst_2:_(s32) = G_CONSTANT i32 2 - %cst_3:_(s32) = G_CONSTANT i32 3 + %cst_1:_(s64) = G_CONSTANT i64 1 + %cst_2:_(s64) = G_CONSTANT i64 2 + %cst_3:_(s64) = G_CONSTANT i64 3 %cst_8:_(s32) = G_CONSTANT i32 8 %duplicate_shl_cst:_(s32) = G_CONSTANT i32 16 %ptr:_(p0) = COPY $x1 - %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) %byte0:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) @@ -1153,16 +1153,16 @@ body: | ; LITTLE-LABEL: name: dont_combine_lowest_index_not_zero_offset ; LITTLE: liveins: $x0, $x1 - ; LITTLE: %cst_1:_(s32) = G_CONSTANT i32 1 - ; LITTLE: %cst_2:_(s32) = G_CONSTANT i32 2 - ; LITTLE: %cst_3:_(s32) = G_CONSTANT i32 3 + ; LITTLE: %cst_1:_(s64) = G_CONSTANT i64 1 + ; LITTLE: %cst_2:_(s64) = G_CONSTANT i64 2 + ; LITTLE: %cst_3:_(s64) = G_CONSTANT i64 3 ; LITTLE: %cst_8:_(s32) = G_CONSTANT i32 8 ; LITTLE: %cst_16:_(s32) = G_CONSTANT i32 16 ; LITTLE: %cst_24:_(s32) = G_CONSTANT i32 24 ; LITTLE: %ptr:_(p0) = COPY $x1 - ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + ; LITTLE: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + ; LITTLE: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + ; LITTLE: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) ; LITTLE: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; LITTLE: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; LITTLE: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) @@ -1177,16 +1177,16 @@ body: | ; LITTLE: RET_ReallyLR implicit $w1 ; BIG-LABEL: name: dont_combine_lowest_index_not_zero_offset ; BIG: liveins: $x0, $x1 - ; BIG: %cst_1:_(s32) = G_CONSTANT i32 1 - ; BIG: %cst_2:_(s32) = G_CONSTANT i32 2 - ; BIG: %cst_3:_(s32) = G_CONSTANT i32 3 + ; BIG: %cst_1:_(s64) = G_CONSTANT i64 1 + ; BIG: %cst_2:_(s64) = G_CONSTANT i64 2 + ; BIG: %cst_3:_(s64) = G_CONSTANT i64 3 ; BIG: %cst_8:_(s32) = G_CONSTANT i32 8 ; BIG: %cst_16:_(s32) = G_CONSTANT i32 16 ; BIG: %cst_24:_(s32) = G_CONSTANT i32 24 ; BIG: %ptr:_(p0) = COPY $x1 - ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + ; BIG: %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + ; BIG: %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + ; BIG: %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) ; BIG: %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) ; BIG: %byte0:_(s32) = G_ZEXTLOAD %ptr_elt_1(p0) :: (load (s8)) ; BIG: %elt2:_(s32) = G_ZEXTLOAD %ptr_elt_2(p0) :: (load (s8)) @@ -1199,18 +1199,18 @@ body: | ; BIG: %full_load:_(s32) = G_OR %or1, %or2 ; BIG: $w1 = COPY %full_load(s32) ; BIG: RET_ReallyLR implicit $w1 - %cst_1:_(s32) = G_CONSTANT i32 1 - %cst_2:_(s32) = G_CONSTANT i32 2 - %cst_3:_(s32) = G_CONSTANT i32 3 + %cst_1:_(s64) = G_CONSTANT i64 1 + %cst_2:_(s64) = G_CONSTANT i64 2 + %cst_3:_(s64) = G_CONSTANT i64 3 %cst_8:_(s32) = G_CONSTANT i32 8 %cst_16:_(s32) = G_CONSTANT i32 16 %cst_24:_(s32) = G_CONSTANT i32 24 %ptr:_(p0) = COPY $x1 - %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s32) - %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s32) - %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s32) + %ptr_elt_1:_(p0) = G_PTR_ADD %ptr, %cst_1(s64) + %ptr_elt_2:_(p0) = G_PTR_ADD %ptr, %cst_2(s64) + %ptr_elt_3:_(p0) = G_PTR_ADD %ptr, %cst_3(s64) ; This load is index 0 %lowest_idx_load:_(s32) = G_ZEXTLOAD %ptr(p0) :: (load (s8)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir index aa72a9ec06ede..b49f516098513 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-extract-vector-load.mir @@ -8,8 +8,9 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_ptradd_crash__offset_smaller - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[C]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1) ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %1:_(p1) = G_CONSTANT i64 0 @@ -27,8 +28,12 @@ tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: name: test_ptradd_crash__offset_wider - ; CHECK: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 12 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[C]](p1) :: (load (s32), addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 3 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[C]](s128) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[TRUNC]], [[C1]](s64) + ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p1) = G_INTTOPTR [[SHL]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[INTTOPTR]](p1) :: (load (s32), addrspace 1) ; CHECK-NEXT: $sgpr0 = COPY [[LOAD]](s32) ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %1:_(p1) = G_CONSTANT i64 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index b58c3b2098636..43f3dcc86f426 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -11,9 +11,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v2, 63, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dword v0, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -28,10 +27,8 @@ define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX12-NEXT: v_and_b32_e32 v2, 63, v2 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v2, 2, v2 -; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX12-NEXT: global_load_b32 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -46,9 +43,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_ushort v0, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -63,10 +59,8 @@ define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX12-NEXT: v_and_b32_e32 v2, 0x7f, v2 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX12-NEXT: global_load_u16 v0, v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] @@ -81,9 +75,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v2, 31, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -98,10 +91,8 @@ define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) { ; GFX12-NEXT: v_and_b32_e32 v2, 31, v2 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v2, 3, v2 -; GFX12-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX12-NEXT: global_load_b64 v[0:1], v[0:1], off ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll index 057790617204c..e1ce9ea14a2a9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -6,37 +6,44 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) { -; GCN-LABEL: extractelement_sgpr_v4i128_sgpr_idx: -; GCN: ; %bb.0: -; GCN-NEXT: s_and_b32 s0, s4, 3 -; GCN-NEXT: s_lshl_b32 s0, s0, 4 -; GCN-NEXT: s_ashr_i32 s1, s0, 31 -; GCN-NEXT: s_add_u32 s0, s2, s0 -; GCN-NEXT: s_addc_u32 s1, s3, s1 -; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: ; return to shader part epilog +; GFX9-LABEL: extractelement_sgpr_v4i128_sgpr_idx: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_and_b32 s0, s4, 3 +; GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: ; return to shader part epilog +; +; GFX8-LABEL: extractelement_sgpr_v4i128_sgpr_idx: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_and_b32 s0, s4, 3 +; GFX8-NEXT: s_lshl_b32 s0, s0, 4 +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: ; return to shader part epilog +; +; GFX7-LABEL: extractelement_sgpr_v4i128_sgpr_idx: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_and_b32 s0, s4, 3 +; GFX7-NEXT: s_lshl_b32 s0, s0, 4 +; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: extractelement_sgpr_v4i128_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_and_b32 s0, s4, 3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4 -; GFX10-NEXT: s_ashr_i32 s1, s0, 31 -; GFX10-NEXT: s_add_u32 s0, s2, s0 -; GFX10-NEXT: s_addc_u32 s1, s3, s1 -; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], s0 offset:0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: ; return to shader part epilog ; ; GFX11-LABEL: extractelement_sgpr_v4i128_sgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_and_b32 s0, s4, 3 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_lshl_b32 s0, s0, 4 -; GFX11-NEXT: s_ashr_i32 s1, s0, 31 -; GFX11-NEXT: s_add_u32 s0, s2, s0 -; GFX11-NEXT: s_addc_u32 s1, s3, s1 -; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 +; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], s0 offset:0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog %vector = load <4 x i128>, ptr addrspace(4) %ptr @@ -48,8 +55,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr ; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_and_b32 s0, s2, 3 +; GFX9-NEXT: s_mov_b32 s1, 0 ; GFX9-NEXT: s_lshl_b32 s0, s0, 4 -; GFX9-NEXT: s_ashr_i32 s1, s0, 31 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 @@ -65,8 +72,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr ; GFX8-LABEL: extractelement_vgpr_v4i128_sgpr_idx: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_and_b32 s0, s2, 3 +; GFX8-NEXT: s_mov_b32 s1, 0 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4 -; GFX8-NEXT: s_ashr_i32 s1, s0, 31 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -82,10 +89,10 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr ; GFX7-LABEL: extractelement_vgpr_v4i128_sgpr_idx: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_and_b32 s0, s2, 3 +; GFX7-NEXT: s_mov_b32 s1, 0 ; GFX7-NEXT: s_lshl_b32 s0, s0, 4 -; GFX7-NEXT: s_ashr_i32 s1, s0, 31 -; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, s1 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readfirstlane_b32 s0, v0 @@ -97,8 +104,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr ; GFX10-LABEL: extractelement_vgpr_v4i128_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_and_b32 s0, s2, 3 +; GFX10-NEXT: s_mov_b32 s1, 0 ; GFX10-NEXT: s_lshl_b32 s0, s0, 4 -; GFX10-NEXT: s_ashr_i32 s1, s0, 31 ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -114,9 +121,8 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr ; GFX11-LABEL: extractelement_vgpr_v4i128_sgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_and_b32 s0, s2, 3 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_lshl_b32 s0, s0, 4 -; GFX11-NEXT: s_ashr_i32 s1, s0, 31 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -140,9 +146,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -152,9 +157,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v2 -; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -164,9 +168,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 4, v2 -; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 @@ -179,9 +182,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v2 -; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -192,10 +194,8 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx ; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v2 -; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -208,13 +208,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre ; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 4, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: v_readfirstlane_b32 s1, v1 @@ -227,10 +222,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre ; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 @@ -242,10 +236,10 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre ; GFX7-LABEL: extractelement_sgpr_v4i128_vgpr_idx: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX7-NEXT: s_mov_b32 s0, s2 ; GFX7-NEXT: s_mov_b32 s1, s3 -; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, 0 ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64 @@ -259,13 +253,8 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre ; GFX10-LABEL: extractelement_sgpr_v4i128_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 4, v0 -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: v_readfirstlane_b32 s1, v1 @@ -276,14 +265,9 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inre ; GFX11-LABEL: extractelement_sgpr_v4i128_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 4, v0 -; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-NEXT: global_load_b128 v[0:3], v[0:1], off +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX11-NEXT: global_load_b128 v[0:3], v0, s[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: v_readfirstlane_b32 s1, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll index 6d772df3fa281..021f609053a0f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll @@ -10,11 +10,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg ; GFX9: ; %bb.0: ; GFX9-NEXT: s_and_b32 s0, s4, 3 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-NEXT: s_ashr_i32 s1, s0, 31 -; GFX9-NEXT: s_add_u32 s0, s2, s0 -; GFX9-NEXT: s_addc_u32 s1, s3, s1 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: global_load_ushort v0, v0, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -23,9 +20,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg ; GFX8: ; %bb.0: ; GFX8-NEXT: s_and_b32 s0, s4, 3 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: s_ashr_i32 s1, s0, 31 ; GFX8-NEXT: s_add_u32 s0, s2, s0 -; GFX8-NEXT: s_addc_u32 s1, s3, s1 +; GFX8-NEXT: s_addc_u32 s1, s3, 0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v0, v[0:1] @@ -38,11 +34,11 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg ; GFX7-NEXT: s_mov_b32 s0, s2 ; GFX7-NEXT: s_and_b32 s2, s4, 3 ; GFX7-NEXT: s_lshl_b32 s4, s2, 1 -; GFX7-NEXT: s_ashr_i32 s5, s4, 31 +; GFX7-NEXT: s_mov_b32 s5, 0 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: s_mov_b32 s1, s3 -; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, s5 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) @@ -52,12 +48,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg ; GFX10-LABEL: extractelement_sgpr_v4i16_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_and_b32 s0, s4, 3 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-NEXT: s_ashr_i32 s1, s0, 31 -; GFX10-NEXT: s_add_u32 s0, s2, s0 -; GFX10-NEXT: s_addc_u32 s1, s3, s1 -; GFX10-NEXT: global_load_ushort v0, v0, s[0:1] +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog @@ -65,13 +58,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg ; GFX11-LABEL: extractelement_sgpr_v4i16_sgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_and_b32 s0, s4, 3 -; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_ashr_i32 s1, s0, 31 -; GFX11-NEXT: s_add_u32 s0, s2, s0 -; GFX11-NEXT: s_addc_u32 s1, s3, s1 -; GFX11-NEXT: global_load_u16 v0, v0, s[0:1] +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog @@ -84,8 +74,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_and_b32 s0, s2, 3 +; GFX9-NEXT: s_mov_b32 s1, 0 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-NEXT: s_ashr_i32 s1, s0, 31 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 @@ -98,8 +88,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX8-LABEL: extractelement_vgpr_v4i16_sgpr_idx: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_and_b32 s0, s2, 3 +; GFX8-NEXT: s_mov_b32 s1, 0 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: s_ashr_i32 s1, s0, 31 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -112,10 +102,10 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX7-LABEL: extractelement_vgpr_v4i16_sgpr_idx: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_and_b32 s0, s2, 3 +; GFX7-NEXT: s_mov_b32 s1, 0 ; GFX7-NEXT: s_lshl_b32 s0, s0, 1 -; GFX7-NEXT: s_ashr_i32 s1, s0, 31 -; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, s1 ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readfirstlane_b32 s0, v0 @@ -124,8 +114,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX10-LABEL: extractelement_vgpr_v4i16_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_and_b32 s0, s2, 3 +; GFX10-NEXT: s_mov_b32 s1, 0 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-NEXT: s_ashr_i32 s1, s0, 31 ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -138,9 +128,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX11-LABEL: extractelement_vgpr_v4i16_sgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_and_b32 s0, s2, 3 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-NEXT: s_ashr_i32 s1, s0, 31 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -161,9 +150,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_ushort v0, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -173,9 +161,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_ushort v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -185,9 +172,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 @@ -200,9 +186,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -213,10 +198,8 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX11-NEXT: v_and_b32_e32 v2, 3, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -229,13 +212,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg ; GFX9-LABEL: extractelement_sgpr_v4i16_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -245,10 +223,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg ; GFX8-NEXT: v_and_b32_e32 v0, 3, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_ushort v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 @@ -257,10 +234,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg ; GFX7-LABEL: extractelement_sgpr_v4i16_vgpr_idx: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX7-NEXT: s_mov_b32 s0, s2 ; GFX7-NEXT: s_mov_b32 s1, s3 -; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, 0 ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 @@ -271,13 +248,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg ; GFX10-LABEL: extractelement_sgpr_v4i16_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v0 -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: global_load_ushort v0, v[0:1], off +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog @@ -285,14 +257,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg ; GFX11-LABEL: extractelement_sgpr_v4i16_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_and_b32_e32 v0, 3, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v0 -; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog @@ -686,11 +653,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg ; GFX9: ; %bb.0: ; GFX9-NEXT: s_and_b32 s0, s4, 7 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-NEXT: s_ashr_i32 s1, s0, 31 -; GFX9-NEXT: s_add_u32 s0, s2, s0 -; GFX9-NEXT: s_addc_u32 s1, s3, s1 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: global_load_ushort v0, v0, s[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -699,9 +663,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg ; GFX8: ; %bb.0: ; GFX8-NEXT: s_and_b32 s0, s4, 7 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: s_ashr_i32 s1, s0, 31 ; GFX8-NEXT: s_add_u32 s0, s2, s0 -; GFX8-NEXT: s_addc_u32 s1, s3, s1 +; GFX8-NEXT: s_addc_u32 s1, s3, 0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_load_ushort v0, v[0:1] @@ -714,11 +677,11 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg ; GFX7-NEXT: s_mov_b32 s0, s2 ; GFX7-NEXT: s_and_b32 s2, s4, 7 ; GFX7-NEXT: s_lshl_b32 s4, s2, 1 -; GFX7-NEXT: s_ashr_i32 s5, s4, 31 +; GFX7-NEXT: s_mov_b32 s5, 0 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: s_mov_b32 s1, s3 -; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, s5 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) @@ -728,12 +691,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg ; GFX10-LABEL: extractelement_sgpr_v8i16_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_and_b32 s0, s4, 7 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-NEXT: s_ashr_i32 s1, s0, 31 -; GFX10-NEXT: s_add_u32 s0, s2, s0 -; GFX10-NEXT: s_addc_u32 s1, s3, s1 -; GFX10-NEXT: global_load_ushort v0, v0, s[0:1] +; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog @@ -741,13 +701,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg ; GFX11-LABEL: extractelement_sgpr_v8i16_sgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_and_b32 s0, s4, 7 -; GFX11-NEXT: v_mov_b32_e32 v0, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_ashr_i32 s1, s0, 31 -; GFX11-NEXT: s_add_u32 s0, s2, s0 -; GFX11-NEXT: s_addc_u32 s1, s3, s1 -; GFX11-NEXT: global_load_u16 v0, v0, s[0:1] +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog @@ -760,8 +717,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_and_b32 s0, s2, 7 +; GFX9-NEXT: s_mov_b32 s1, 0 ; GFX9-NEXT: s_lshl_b32 s0, s0, 1 -; GFX9-NEXT: s_ashr_i32 s1, s0, 31 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 @@ -774,8 +731,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX8-LABEL: extractelement_vgpr_v8i16_sgpr_idx: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_and_b32 s0, s2, 7 +; GFX8-NEXT: s_mov_b32 s1, 0 ; GFX8-NEXT: s_lshl_b32 s0, s0, 1 -; GFX8-NEXT: s_ashr_i32 s1, s0, 31 ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 @@ -788,10 +745,10 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX7-LABEL: extractelement_vgpr_v8i16_sgpr_idx: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_and_b32 s0, s2, 7 +; GFX7-NEXT: s_mov_b32 s1, 0 ; GFX7-NEXT: s_lshl_b32 s0, s0, 1 -; GFX7-NEXT: s_ashr_i32 s1, s0, 31 -; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: s_mov_b32 s2, s1 ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_readfirstlane_b32 s0, v0 @@ -800,8 +757,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX10-LABEL: extractelement_vgpr_v8i16_sgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_and_b32 s0, s2, 7 +; GFX10-NEXT: s_mov_b32 s1, 0 ; GFX10-NEXT: s_lshl_b32 s0, s0, 1 -; GFX10-NEXT: s_ashr_i32 s1, s0, 31 ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -814,9 +771,8 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, ; GFX11-LABEL: extractelement_vgpr_v8i16_sgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_and_b32 s0, s2, 7 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_lshl_b32 s0, s0, 1 -; GFX11-NEXT: s_ashr_i32 s1, s0, 31 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 @@ -837,9 +793,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: global_load_ushort v0, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -849,9 +804,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_ushort v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -861,9 +815,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX7-NEXT: s_mov_b32 s6, 0 ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 @@ -876,9 +829,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX10-NEXT: global_load_ushort v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -889,10 +841,8 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) ; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v2 -; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; GFX11-NEXT: global_load_u16 v0, v[0:1], off ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -905,13 +855,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg ; GFX9-LABEL: extractelement_sgpr_v8i16_vgpr_idx: ; GFX9: ; %bb.0: ; GFX9-NEXT: v_and_b32_e32 v0, 7, v0 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 1, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX9-NEXT: v_mov_b32_e32 v1, s3 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc -; GFX9-NEXT: global_load_ushort v0, v[0:1], off +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: global_load_ushort v0, v0, s[2:3] ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s0, v0 ; GFX9-NEXT: ; return to shader part epilog @@ -921,10 +866,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg ; GFX8-NEXT: v_and_b32_e32 v0, 7, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s2 -; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 ; GFX8-NEXT: v_mov_b32_e32 v1, s3 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 -; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; GFX8-NEXT: flat_load_ushort v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 @@ -933,10 +877,10 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg ; GFX7-LABEL: extractelement_sgpr_v8i16_vgpr_idx: ; GFX7: ; %bb.0: ; GFX7-NEXT: v_and_b32_e32 v0, 7, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0 ; GFX7-NEXT: s_mov_b32 s0, s2 ; GFX7-NEXT: s_mov_b32 s1, s3 -; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX7-NEXT: v_mov_b32_e32 v1, 0 ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64 @@ -947,13 +891,8 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg ; GFX10-LABEL: extractelement_sgpr_v8i16_vgpr_idx: ; GFX10: ; %bb.0: ; GFX10-NEXT: v_and_b32_e32 v0, 7, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 1, v0 -; GFX10-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-NEXT: v_mov_b32_e32 v1, s3 -; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX10-NEXT: global_load_ushort v0, v[0:1], off +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX10-NEXT: global_load_ushort v0, v0, s[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s0, v0 ; GFX10-NEXT: ; return to shader part epilog @@ -961,14 +900,9 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg ; GFX11-LABEL: extractelement_sgpr_v8i16_vgpr_idx: ; GFX11: ; %bb.0: ; GFX11-NEXT: v_and_b32_e32 v0, 7, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_lshlrev_b32_e32 v2, 1, v0 -; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 -; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 -; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo -; GFX11-NEXT: global_load_u16 v0, v[0:1], off +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX11-NEXT: global_load_u16 v0, v0, s[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir index 660746c84287d..09e1109c36293 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ptr-add.mir @@ -205,210 +205,3 @@ body: | %2:_(<2 x p3>) = G_PTR_ADD %0, %1 $vgpr0_vgpr1 = COPY %2 ... - ---- -name: test_gep_global_s16_idx -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 - - ; CHECK-LABEL: name: test_gep_global_s16_idx - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY1]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ANYEXT]], 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1) - %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(s16) = G_TRUNC %1 - %3:_(p1) = G_PTR_ADD %0, %2 - $vgpr0_vgpr1 = COPY %3 -... - ---- -name: test_gep_global_s32_idx -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 - - ; CHECK-LABEL: name: test_gep_global_s32_idx - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[SEXT]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1) - %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s32) = COPY $vgpr2 - %2:_(p1) = G_PTR_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 -... - ---- -name: test_gep_global_s96_idx -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 - - ; CHECK-LABEL: name: test_gep_global_s96_idx - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[TRUNC]](s64) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PTR_ADD]](p1) - %0:_(p1) = COPY $vgpr0_vgpr1 - %1:_(s96) = COPY $vgpr2_vgpr3_vgpr4 - %2:_(p1) = G_PTR_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 -... - ---- -name: test_gep_local_i16_idx -body: | - bb.0: - liveins: $vgpr0, $vgpr1 - - ; CHECK-LABEL: name: test_gep_local_i16_idx - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[SEXT_INREG]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3) - %0:_(p3) = COPY $vgpr0 - %1:_(s32) = COPY $vgpr1 - %2:_(s16) = G_TRUNC %1 - %3:_(p3) = G_PTR_ADD %0, %2 - $vgpr0 = COPY %3 -... - ---- -name: test_gep_local_i64_idx -body: | - bb.0: - liveins: $vgpr0, $vgpr1_vgpr2 - - ; CHECK-LABEL: name: test_gep_local_i64_idx - ; CHECK: liveins: $vgpr0, $vgpr1_vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr1_vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[TRUNC]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[PTR_ADD]](p3) - %0:_(p3) = COPY $vgpr0 - %1:_(s64) = COPY $vgpr1_vgpr2 - %2:_(p3) = G_PTR_ADD %0, %1 - $vgpr0 = COPY %2 -... - ---- -name: test_gep_v2p1_v2i32 -body: | - bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 - - ; CHECK-LABEL: name: test_gep_v2p1_v2i32 - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[UV2]](s32) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[SEXT]](s64) - ; CHECK-NEXT: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[UV3]](s32) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[SEXT1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) - %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 - %2:_(<2 x p1>) = G_PTR_ADD %0, %1 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2 -... - ---- -name: test_gep_v2p1_v2i96 -body: | - bb.0: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9 - - ; CHECK-LABEL: name: test_gep_v2p1_v2i96 - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6, $vgpr7_vgpr8_vgpr9 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY $vgpr7_vgpr8_vgpr9 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p1), [[UV1:%[0-9]+]]:_(p1) = G_UNMERGE_VALUES [[COPY]](<2 x p1>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[COPY1]](s96) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV]], [[TRUNC]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[COPY2]](s96) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[UV1]], [[TRUNC1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[PTR_ADD]](p1), [[PTR_ADD1]](p1) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x p1>) - %0:_(<2 x p1>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - %2:_(s96) = COPY $vgpr7_vgpr8_vgpr9 - %3:_(<2 x s96>) = G_BUILD_VECTOR %1, %2 - %4:_(<2 x p1>) = G_PTR_ADD %0, %3 - $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4 -... - ---- -name: test_gep_v2p3_v2s16 -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2 - - ; CHECK-LABEL: name: test_gep_v2p3_v2s16 - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[SEXT_INREG]](s32) - ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[SEXT_INREG1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) - %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s16>) = COPY $vgpr2 - %2:_(<2 x p3>) = G_PTR_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 -... - ---- -name: test_gep_v2p3_v2s64 -body: | - bb.0: - liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - - ; CHECK-LABEL: name: test_gep_v2p3_v2s64 - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p3>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV]], [[TRUNC]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[UV1]], [[TRUNC1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[PTR_ADD]](p3), [[PTR_ADD1]](p3) - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x p3>) - %0:_(<2 x p3>) = COPY $vgpr0_vgpr1 - %1:_(<2 x s64>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5 - %2:_(<2 x p3>) = G_PTR_ADD %0, %1 - $vgpr0_vgpr1 = COPY %2 -... diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir index c1b1e2282254c..044ad60d1ae76 100644 --- a/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir +++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-load-store.mir @@ -9,7 +9,6 @@ define void @test_load_store_64_novfp() #1 { ret void } define void @test_gep_s32() { ret void } - define void @test_gep_s16() { ret void } attributes #0 = { "target-features"="+vfp2" } attributes #1 = { "target-features"="-vfp2sp" } @@ -211,30 +210,3 @@ body: | $r0 = COPY %2(p0) BX_RET 14, $noreg, implicit $r0 ... ---- -name: test_gep_s16 -# CHECK-LABEL: name: test_gep_s16 -legalized: false -# CHECK: legalized: true -regBankSelected: false -selected: false -tracksRegLiveness: true -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.0: - liveins: $r0 - - %0(p0) = COPY $r0 - %1(s16) = G_LOAD %0(p0) :: (load (s16)) - - ; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16) - ; CHECK: {{%[0-9]+}}:_(p0) = G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s32) - ; CHECK-NOT: G_PTR_ADD {{%[0-9]+}}, {{%[0-9]+}}(s16) - %2(p0) = G_PTR_ADD %0, %1(s16) - - $r0 = COPY %2(p0) - BX_RET 14, $noreg, implicit $r0 -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir new file mode 100644 index 0000000000000..584a400996e6a --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-32.mir @@ -0,0 +1,55 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK + +--- | + define void @test_gep_i32c(ptr %addr) { + %arrayidx = getelementptr i32, ptr undef, i32 5 + ret void + } + define void @test_gep_i32(ptr %addr, i32 %ofs) { + %arrayidx = getelementptr i32, ptr undef, i32 %ofs + ret void + } +... +--- +name: test_gep_i32c +legalized: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + ; CHECK-LABEL: name: test_gep_i32c + ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) + ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) + ; CHECK-NEXT: RET 0 + %0(p0) = IMPLICIT_DEF + %1(s32) = G_CONSTANT i32 20 + %2(p0) = G_PTR_ADD %0, %1(s32) + G_STORE %2, %0 :: (store (p0) into %ir.addr) + RET 0 +... +--- +name: test_gep_i32 +legalized: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + ; CHECK-LABEL: name: test_gep_i32 + ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32) + ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) + ; CHECK-NEXT: RET 0 + %0(p0) = IMPLICIT_DEF + %1(s32) = IMPLICIT_DEF + %2(p0) = G_PTR_ADD %0, %1(s32) + G_STORE %2, %0 :: (store (p0) into %ir.addr) + RET 0 +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir new file mode 100644 index 0000000000000..7826257c21e58 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add-64.mir @@ -0,0 +1,55 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=X64 + +--- | + define void @test_gep_i64c(ptr %addr) { + %arrayidx = getelementptr i32, ptr undef, i64 5 + ret void + } + define void @test_gep_i64(ptr %addr, i64 %ofs) { + %arrayidx = getelementptr i32, ptr undef, i64 %ofs + ret void + } +... +--- +name: test_gep_i64c +legalized: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + ; X64-LABEL: name: test_gep_i64c + ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF + ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 + ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64) + ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) + ; X64-NEXT: RET 0 + %0(p0) = IMPLICIT_DEF + %1(s64) = G_CONSTANT i64 20 + %2(p0) = G_PTR_ADD %0, %1(s64) + G_STORE %2, %0 :: (store (p0) into %ir.addr) + RET 0 +... +--- +name: test_gep_i64 +legalized: false +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +body: | + bb.1 (%ir-block.0): + ; X64-LABEL: name: test_gep_i64 + ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF + ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF + ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64) + ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) + ; X64-NEXT: RET 0 + %0(p0) = IMPLICIT_DEF + %1(s64) = IMPLICIT_DEF + %2(p0) = G_PTR_ADD %0, %1(s64) + G_STORE %2, %0 :: (store (p0) into %ir.addr) + RET 0 +... diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir deleted file mode 100644 index b1beb2e98cc8d..0000000000000 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-ptr-add.mir +++ /dev/null @@ -1,224 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=x86_64-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X64 -# RUN: llc -mtriple=i386-linux-gnu -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,X86 - ---- | - define void @test_gep_i8c(ptr %addr) { - %arrayidx = getelementptr i32, ptr undef, i8 5 - ret void - } - define void @test_gep_i8(ptr %addr, i8 %ofs) { - %arrayidx = getelementptr i32, ptr undef, i8 %ofs - ret void - } - - define void @test_gep_i16c(ptr %addr) { - %arrayidx = getelementptr i32, ptr undef, i16 5 - ret void - } - define void @test_gep_i16(ptr %addr, i16 %ofs) { - %arrayidx = getelementptr i32, ptr undef, i16 %ofs - ret void - } - - define void @test_gep_i32c(ptr %addr) { - %arrayidx = getelementptr i32, ptr undef, i32 5 - ret void - } - define void @test_gep_i32(ptr %addr, i32 %ofs) { - %arrayidx = getelementptr i32, ptr undef, i32 %ofs - ret void - } - - define void @test_gep_i64c(ptr %addr) { - %arrayidx = getelementptr i32, ptr undef, i64 5 - ret void - } - define void @test_gep_i64(ptr %addr, i64 %ofs) { - %arrayidx = getelementptr i32, ptr undef, i64 %ofs - ret void - } -... ---- -name: test_gep_i8c -legalized: false -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_gep_i8c - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; CHECK-NEXT: RET 0 - %0(p0) = IMPLICIT_DEF - %1(s8) = G_CONSTANT i8 20 - %2(p0) = G_PTR_ADD %0, %1(s8) - G_STORE %2, %0 :: (store (p0) into %ir.addr) - RET 0 -... ---- -name: test_gep_i8 -legalized: false -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_gep_i8 - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[DEF1]](s8) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[SEXT]](s32) - ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; CHECK-NEXT: RET 0 - %0(p0) = IMPLICIT_DEF - %1(s8) = IMPLICIT_DEF - %2(p0) = G_PTR_ADD %0, %1(s8) - G_STORE %2, %0 :: (store (p0) into %ir.addr) - RET 0 -... ---- -name: test_gep_i16c -legalized: false -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_gep_i16c - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; CHECK-NEXT: RET 0 - %0(p0) = IMPLICIT_DEF - %1(s16) = G_CONSTANT i16 20 - %2(p0) = G_PTR_ADD %0, %1(s16) - G_STORE %2, %0 :: (store (p0) into %ir.addr) - RET 0 -... ---- -name: test_gep_i16 -legalized: false -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_gep_i16 - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = IMPLICIT_DEF - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[DEF1]](s16) - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[SEXT]](s32) - ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; CHECK-NEXT: RET 0 - %0(p0) = IMPLICIT_DEF - %1(s16) = IMPLICIT_DEF - %2(p0) = G_PTR_ADD %0, %1(s16) - G_STORE %2, %0 :: (store (p0) into %ir.addr) - RET 0 -... ---- -name: test_gep_i32c -legalized: false -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_gep_i32c - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; CHECK-NEXT: RET 0 - %0(p0) = IMPLICIT_DEF - %1(s32) = G_CONSTANT i32 20 - %2(p0) = G_PTR_ADD %0, %1(s32) - G_STORE %2, %0 :: (store (p0) into %ir.addr) - RET 0 -... ---- -name: test_gep_i32 -legalized: false -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - ; CHECK-LABEL: name: test_gep_i32 - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = IMPLICIT_DEF - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s32) - ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; CHECK-NEXT: RET 0 - %0(p0) = IMPLICIT_DEF - %1(s32) = IMPLICIT_DEF - %2(p0) = G_PTR_ADD %0, %1(s32) - G_STORE %2, %0 :: (store (p0) into %ir.addr) - RET 0 -... ---- -name: test_gep_i64c -legalized: false -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - ; X64-LABEL: name: test_gep_i64c - ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 20 - ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s64) - ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; X64-NEXT: RET 0 - ; X86-LABEL: name: test_gep_i64c - ; X86: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; X86-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; X86-NEXT: RET 0 - %0(p0) = IMPLICIT_DEF - %1(s64) = G_CONSTANT i64 20 - %2(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %2, %0 :: (store (p0) into %ir.addr) - RET 0 -... ---- -name: test_gep_i64 -legalized: false -registers: - - { id: 0, class: _ } - - { id: 1, class: _ } - - { id: 2, class: _ } -body: | - bb.1 (%ir-block.0): - ; X64-LABEL: name: test_gep_i64 - ; X64: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF - ; X64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[DEF1]](s64) - ; X64-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; X64-NEXT: RET 0 - ; X86-LABEL: name: test_gep_i64 - ; X86: [[DEF:%[0-9]+]]:_(p0) = IMPLICIT_DEF - ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF - ; X86-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF1]](s64) - ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[DEF]], [[TRUNC]](s32) - ; X86-NEXT: G_STORE [[PTR_ADD]](p0), [[DEF]](p0) :: (store (p0) into %ir.addr) - ; X86-NEXT: RET 0 - %0(p0) = IMPLICIT_DEF - %1(s64) = IMPLICIT_DEF - %2(p0) = G_PTR_ADD %0, %1(s64) - G_STORE %2, %0 :: (store (p0) into %ir.addr) - RET 0 -... diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir index c2dcf30359248..03d4c7dd3281d 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-X86_64.mir @@ -1380,23 +1380,18 @@ body: | bb.0 (%ir-block.0): ; FAST-LABEL: name: test_gep ; FAST: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF - ; FAST: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20 - ; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; FAST: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20 - ; FAST: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; FAST: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20 + ; FAST: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64) ; FAST: RET 0 + ; ; GREEDY-LABEL: name: test_gep ; GREEDY: [[DEF:%[0-9]+]]:gpr(p0) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 20 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s32) - ; GREEDY: [[C1:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; GREEDY: [[C:%[0-9]+]]:gpr(s64) = G_CONSTANT i64 20 + ; GREEDY: [[PTR_ADD:%[0-9]+]]:gpr(p0) = G_PTR_ADD [[DEF]], [[C]](s64) ; GREEDY: RET 0 %0(p0) = G_IMPLICIT_DEF - %1(s32) = G_CONSTANT i32 20 - %2(p0) = G_PTR_ADD %0, %1(s32) - %3(s64) = G_CONSTANT i64 20 - %4(p0) = G_PTR_ADD %0, %3(s64) + %1(s64) = G_CONSTANT i64 20 + %2(p0) = G_PTR_ADD %0, %1(s64) RET 0 ... diff --git a/llvm/test/MachineVerifier/test_g_ptr_add.mir b/llvm/test/MachineVerifier/test_g_ptr_add.mir index 07fe6266701d5..7d1373586c8eb 100644 --- a/llvm/test/MachineVerifier/test_g_ptr_add.mir +++ b/llvm/test/MachineVerifier/test_g_ptr_add.mir @@ -1,4 +1,4 @@ -#RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# RUN: not --crash llc -o - -mtriple=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s # REQUIRES: aarch64-registered-target --- @@ -29,4 +29,8 @@ body: | ; CHECK: Bad machine code: gep first operand must be a pointer %6:_(s64) = G_PTR_ADD %1, %1 + %7:_(s32) = G_IMPLICIT_DEF + + ; CHECK: Bad machine code: gep offset operand must match index size for address space + %8:_(p0) = G_PTR_ADD %0, %7 ...